590 lines
26 KiB
Python
590 lines
26 KiB
Python
"""
|
||
处理已转换的 .docx 模板文档,自动添加占位符
|
||
此脚本专门处理已经手动转换为 .docx 格式的文档,跳过 .doc 转换步骤
|
||
根据占位符与字段对照表,智能识别文档类型并添加相应的占位符
|
||
使用AI大模型智能分析文档内容,识别可替换位置
|
||
"""
|
||
import os
|
||
import re
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional
|
||
|
||
try:
|
||
from docx import Document
|
||
except ImportError:
|
||
print("错误: 请先安装 python-docx: pip install python-docx")
|
||
exit(1)
|
||
|
||
# 尝试导入AI辅助工具
|
||
try:
|
||
from template_ai_helper import TemplateAIHelper, get_available_fields_for_document
|
||
HAS_AI_HELPER = True
|
||
except ImportError:
|
||
HAS_AI_HELPER = False
|
||
print("警告: 无法导入AI辅助工具,将使用基础模式(不使用AI分析)")
|
||
|
||
# 项目根目录
|
||
PROJECT_ROOT = Path(__file__).parent
|
||
ORIGINAL_TEMPLATES_DIR = PROJECT_ROOT / "模板" / "原始模板"
|
||
OUTPUT_TEMPLATES_DIR = PROJECT_ROOT / "模板"
|
||
FIELD_MAPPING_FILE = PROJECT_ROOT / "占位符与字段对照表.md"
|
||
|
||
# 文档类型映射(根据文件名识别)
|
||
DOCUMENT_TYPE_MAPPING = {
|
||
"请示报告卡": {
|
||
"template_code": "REPORT_CARD",
|
||
"fields": ["target_name", "target_organization_and_position", "report_card_request_time"],
|
||
"input_fields": ["clue_info"]
|
||
},
|
||
"初步核实审批表": {
|
||
"template_code": "PRELIMINARY_VERIFICATION_APPROVAL",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth", "target_political_status", "target_professional_rank",
|
||
"clue_source", "target_issue_description", "department_opinion", "filler_name"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"初核方案": {
|
||
"template_code": "INVESTIGATION_PLAN",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_work_basic_info",
|
||
"target_issue_description", "investigation_unit_name", "investigation_team_leader_name",
|
||
"investigation_team_member_names", "investigation_location"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"附件初核方案": {
|
||
"template_code": "INVESTIGATION_PLAN",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_work_basic_info",
|
||
"target_issue_description", "investigation_unit_name", "investigation_team_leader_name",
|
||
"investigation_team_member_names", "investigation_location"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"谈话通知书": {
|
||
"template_code": "NOTIFICATION_LETTER",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_id_number",
|
||
"appointment_time", "appointment_location", "approval_time",
|
||
"handling_department", "handler_name", "notification_time", "notification_location"
|
||
],
|
||
"input_fields": ["target_basic_info_clue"]
|
||
},
|
||
"谈话笔录": {
|
||
"template_code": "INTERVIEW_RECORD",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"谈话询问对象情况摸底调查30问": {
|
||
"template_code": "INVESTIGATION_30_QUESTIONS",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"被谈话人权利义务告知书": {
|
||
"template_code": "RIGHTS_OBLIGATIONS_NOTICE",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"点对点交接单": {
|
||
"template_code": "HANDOVER_FORM",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"陪送交接单": {
|
||
"template_code": "ESCORT_HANDOVER_FORM",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"保密承诺书": {
|
||
"template_code": "CONFIDENTIALITY_COMMITMENT",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"办案人员-办案安全保密承诺书": {
|
||
"template_code": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"请示报告卡(初核报告结论)": {
|
||
"template_code": "REPORT_CARD_CONCLUSION",
|
||
"fields": [
|
||
"investigation_team_code", "target_name", "target_problem_description", "target_attitude"
|
||
],
|
||
"input_fields": []
|
||
},
|
||
"初核情况报告": {
|
||
"template_code": "INVESTIGATION_REPORT",
|
||
"fields": [
|
||
"target_name", "commission_name", "target_work_basic_info",
|
||
"target_issue_description", "target_problem_description", "target_organization_and_position"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"谈话审批表": {
|
||
"template_code": "INTERVIEW_APPROVAL_FORM",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"谈话前安全风险评估表": {
|
||
"template_code": "PRE_INTERVIEW_RISK_ASSESSMENT",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"谈话方案": {
|
||
"template_code": "INTERVIEW_PLAN",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
},
|
||
"谈话后安全风险评估表": {
|
||
"template_code": "POST_INTERVIEW_RISK_ASSESSMENT",
|
||
"fields": [
|
||
"target_name", "target_organization_and_position", "target_gender",
|
||
"target_date_of_birth_full", "target_political_status", "target_address",
|
||
"target_registered_address", "target_contact", "target_place_of_origin",
|
||
"target_ethnicity", "target_id_number", "investigation_team_code"
|
||
],
|
||
"input_fields": ["clue_info", "target_basic_info_clue"]
|
||
}
|
||
}
|
||
|
||
# 字段名称到字段编码的映射(用于智能识别)
|
||
FIELD_NAME_TO_CODE = {
|
||
"被核查人姓名": "target_name",
|
||
"被核查人员单位及职务": "target_organization_and_position",
|
||
"被核查人员性别": "target_gender",
|
||
"被核查人员出生年月": "target_date_of_birth",
|
||
"被核查人员出生年月日": "target_date_of_birth_full",
|
||
"被核查人员政治面貌": "target_political_status",
|
||
"被核查人员职级": "target_professional_rank",
|
||
"被核查人员身份证号": "target_id_number",
|
||
"被核查人员身份证件及号码": "target_id_number",
|
||
"被核查人员住址": "target_address",
|
||
"被核查人员户籍住址": "target_registered_address",
|
||
"被核查人员联系方式": "target_contact",
|
||
"被核查人员籍贯": "target_place_of_origin",
|
||
"被核查人员民族": "target_ethnicity",
|
||
"线索来源": "clue_source",
|
||
"主要问题线索": "target_issue_description",
|
||
"被核查人问题描述": "target_problem_description",
|
||
"被核查人员工作基本情况": "target_work_basic_info",
|
||
"核查单位名称": "investigation_unit_name",
|
||
"核查组组长姓名": "investigation_team_leader_name",
|
||
"核查组成员姓名": "investigation_team_member_names",
|
||
"核查地点": "investigation_location",
|
||
"核查组代号": "investigation_team_code",
|
||
"应到时间": "appointment_time",
|
||
"应到地点": "appointment_location",
|
||
"批准时间": "approval_time",
|
||
"承办部门": "handling_department",
|
||
"承办人": "handler_name",
|
||
"谈话通知时间": "notification_time",
|
||
"谈话通知地点": "notification_location",
|
||
"请示报告卡请示时间": "report_card_request_time",
|
||
"初步核实审批表承办部门意见": "department_opinion",
|
||
"初步核实审批表填表人": "filler_name",
|
||
"被核查人员本人认识和态度": "target_attitude",
|
||
"纪委名称": "commission_name"
|
||
}
|
||
|
||
|
||
def identify_document_type(file_name: str) -> Optional[Dict]:
|
||
"""
|
||
根据文件名识别文档类型
|
||
|
||
Args:
|
||
file_name: 文件名
|
||
|
||
Returns:
|
||
文档类型配置,如果无法识别返回None
|
||
"""
|
||
# 移除扩展名和常见后缀
|
||
base_name = Path(file_name).stem
|
||
base_name = base_name.replace("(XXX)", "").replace("(XXX)", "").replace("XXX", "")
|
||
base_name = base_name.replace("_转自DOC", "").replace("转自DOC", "")
|
||
base_name = base_name.replace("模板", "").strip()
|
||
|
||
# 尝试精确匹配
|
||
for doc_type, config in DOCUMENT_TYPE_MAPPING.items():
|
||
if doc_type in base_name:
|
||
return config
|
||
|
||
# 如果无法精确匹配,尝试部分匹配
|
||
for doc_type, config in DOCUMENT_TYPE_MAPPING.items():
|
||
keywords = doc_type.replace("(", " ").replace(")", " ").replace("(", " ").replace(")", " ").split()
|
||
if any(keyword in base_name for keyword in keywords if len(keyword) > 1):
|
||
return config
|
||
|
||
return None
|
||
|
||
|
||
def apply_ai_replacements(text: str, ai_replacements: List[Dict]) -> str:
|
||
"""
|
||
应用AI识别的替换建议
|
||
|
||
Args:
|
||
text: 原始文本
|
||
ai_replacements: AI识别的替换建议列表
|
||
|
||
Returns:
|
||
替换后的文本
|
||
"""
|
||
result_text = text
|
||
|
||
# 按置信度排序,优先处理高置信度的替换
|
||
sorted_replacements = sorted(ai_replacements, key=lambda x: x.get('confidence', 0), reverse=True)
|
||
|
||
for replacement in sorted_replacements:
|
||
original = replacement.get('original_text', '')
|
||
replacement_text = replacement.get('replacement', '')
|
||
confidence = replacement.get('confidence', 0)
|
||
|
||
# 只应用置信度大于0.7的替换
|
||
if confidence > 0.7 and original and replacement_text:
|
||
# 转义特殊字符
|
||
escaped_original = re.escape(original)
|
||
# 替换(只替换第一次出现,避免重复替换)
|
||
if escaped_original in result_text:
|
||
result_text = result_text.replace(original, replacement_text, 1)
|
||
|
||
return result_text
|
||
|
||
|
||
def process_document(input_path: Path, output_path: Path, doc_config: Dict, use_ai: bool = True) -> bool:
|
||
"""
|
||
处理单个文档,添加占位符
|
||
|
||
Args:
|
||
input_path: 输入文件路径(.docx格式)
|
||
output_path: 输出文件路径
|
||
doc_config: 文档配置
|
||
use_ai: 是否使用AI分析(默认True)
|
||
|
||
Returns:
|
||
是否处理成功
|
||
"""
|
||
try:
|
||
# 只处理 .docx 文件
|
||
if input_path.suffix.lower() != '.docx':
|
||
print(f" ⚠ 跳过: 不是 .docx 文件 ({input_path.suffix})")
|
||
return False
|
||
|
||
# 检查文件是否存在
|
||
if not input_path.exists():
|
||
print(f" ✗ 错误: 文件不存在: {input_path}")
|
||
return False
|
||
|
||
print(f" 处理: {input_path.name}")
|
||
|
||
# 初始化AI助手(如果可用)
|
||
ai_helper = None
|
||
available_fields = []
|
||
if use_ai and HAS_AI_HELPER:
|
||
try:
|
||
print(f" [初始化] 正在初始化AI助手...")
|
||
ai_helper = TemplateAIHelper()
|
||
|
||
# 测试API连接
|
||
if not ai_helper.test_api_connection():
|
||
print(f" [初始化] ⚠ API连接测试失败,将使用基础模式")
|
||
ai_helper = None
|
||
else:
|
||
available_fields = get_available_fields_for_document(doc_config, FIELD_NAME_TO_CODE)
|
||
print(f" [初始化] ✓ AI分析已启用(可用字段: {len(available_fields)} 个)")
|
||
except Exception as e:
|
||
print(f" [初始化] ⚠ AI分析不可用: {e},将使用基础模式")
|
||
import traceback
|
||
traceback.print_exc()
|
||
ai_helper = None
|
||
|
||
# 打开文档
|
||
print(f" [读取] 正在打开文档...")
|
||
doc = Document(str(input_path))
|
||
|
||
# 统计信息
|
||
total_paragraphs = len([p for p in doc.paragraphs if p.text.strip()])
|
||
total_tables = len(doc.tables)
|
||
total_cells = sum(len(table.rows) * len(table.rows[0].cells) if table.rows else 0 for table in doc.tables)
|
||
|
||
print(f" [统计] 文档包含: {total_paragraphs} 个段落, {total_tables} 个表格, 约 {total_cells} 个单元格")
|
||
|
||
# 统计替换次数
|
||
replacement_count = 0
|
||
ai_replacement_count = 0
|
||
|
||
# 处理段落中的占位符
|
||
print(f" [处理] 开始处理段落...")
|
||
for para_idx, paragraph in enumerate(doc.paragraphs):
|
||
if not paragraph.text:
|
||
continue
|
||
|
||
text = paragraph.text
|
||
original_text = text
|
||
|
||
# 首先使用AI分析(如果可用)
|
||
if ai_helper and available_fields:
|
||
try:
|
||
doc_type = doc_config.get('template_code', '未知')
|
||
if para_idx % 10 == 0: # 每10个段落输出一次进度
|
||
print(f" [进度] 处理段落 {para_idx+1}/{total_paragraphs}...")
|
||
|
||
ai_replacements = ai_helper.analyze_paragraph(
|
||
text,
|
||
available_fields,
|
||
doc_type
|
||
)
|
||
|
||
if ai_replacements:
|
||
# 应用AI识别的替换
|
||
text = apply_ai_replacements(text, ai_replacements)
|
||
if text != original_text:
|
||
ai_replacement_count += len(ai_replacements)
|
||
print(f" [AI] 段落 {para_idx+1} 应用了 {len(ai_replacements)} 个替换")
|
||
except Exception as e:
|
||
print(f" [AI] ⚠ 段落 {para_idx+1} AI分析失败: {e}")
|
||
|
||
# 然后使用规则匹配(作为补充)
|
||
for field_code in doc_config.get('fields', []):
|
||
# 查找字段名称
|
||
for field_name, code in FIELD_NAME_TO_CODE.items():
|
||
if code == field_code:
|
||
# 模式1: 字段名称: XXX 或 字段名称: 具体值
|
||
pattern1 = rf"({re.escape(field_name)}[::]\s*)([^\n\r{{]+?)(\s|$|\n|\r|,|。)"
|
||
def replace_func1(match):
|
||
value = match.group(2).strip()
|
||
# 如果值不是占位符格式,且不是空值,则替换
|
||
if value and not value.startswith("{{") and value not in ["——", "—", "-", ""]:
|
||
return f"{match.group(1)}{{{{{field_code}}}}}{match.group(3)}"
|
||
return match.group(0)
|
||
text = re.sub(pattern1, replace_func1, text)
|
||
|
||
# 模式2: 直接替换常见的占位符(XXX)
|
||
pattern2 = rf"({re.escape(field_name)}[::]\s*)(XXX|xxx|待填|待填写)"
|
||
text = re.sub(pattern2, rf"\1{{{{{field_code}}}}}", text)
|
||
break
|
||
|
||
if text != original_text:
|
||
# 替换整个段落文本
|
||
paragraph.clear()
|
||
paragraph.add_run(text)
|
||
replacement_count += 1
|
||
|
||
# 处理表格中的占位符
|
||
print(f" [处理] 开始处理表格...")
|
||
for table_idx, table in enumerate(doc.tables):
|
||
if table_idx % 5 == 0: # 每5个表格输出一次进度
|
||
print(f" [进度] 处理表格 {table_idx+1}/{total_tables}...")
|
||
for row_idx, row in enumerate(table.rows):
|
||
for col_idx, cell in enumerate(row.cells):
|
||
for paragraph in cell.paragraphs:
|
||
if not paragraph.text:
|
||
continue
|
||
|
||
text = paragraph.text
|
||
original_text = text
|
||
|
||
# 首先使用AI分析(如果可用)
|
||
if ai_helper and available_fields:
|
||
try:
|
||
doc_type = doc_config.get('template_code', '未知')
|
||
ai_replacements = ai_helper.analyze_table_cell(
|
||
text,
|
||
available_fields,
|
||
doc_type,
|
||
row_idx,
|
||
col_idx
|
||
)
|
||
|
||
if ai_replacements:
|
||
# 应用AI识别的替换
|
||
text = apply_ai_replacements(text, ai_replacements)
|
||
if text != original_text:
|
||
ai_replacement_count += len(ai_replacements)
|
||
except Exception as e:
|
||
pass # 静默失败,继续使用规则匹配
|
||
|
||
# 然后使用规则匹配(作为补充)
|
||
for field_code in doc_config.get('fields', []):
|
||
for field_name, code in FIELD_NAME_TO_CODE.items():
|
||
if code == field_code:
|
||
# 模式1: 字段名称: XXX 或 字段名称: 具体值
|
||
pattern1 = rf"({re.escape(field_name)}[::]\s*)([^\n\r{{]+?)(\s|$|\n|\r|,|。)"
|
||
def replace_func1(match):
|
||
value = match.group(2).strip()
|
||
if value and not value.startswith("{{") and value not in ["——", "—", "-", ""]:
|
||
return f"{match.group(1)}{{{{{field_code}}}}}{match.group(3)}"
|
||
return match.group(0)
|
||
text = re.sub(pattern1, replace_func1, text)
|
||
|
||
# 模式2: 直接替换常见的占位符(XXX)
|
||
pattern2 = rf"({re.escape(field_name)}[::]\s*)(XXX|xxx|待填|待填写)"
|
||
text = re.sub(pattern2, rf"\1{{{{{field_code}}}}}", text)
|
||
break
|
||
|
||
if text != original_text:
|
||
paragraph.clear()
|
||
paragraph.add_run(text)
|
||
replacement_count += 1
|
||
|
||
# 确保输出目录存在
|
||
print(f" [保存] 正在保存文档...")
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 保存文档
|
||
doc.save(str(output_path))
|
||
print(f" [保存] ✓ 文档已保存到: {output_path}")
|
||
|
||
if replacement_count > 0 or ai_replacement_count > 0:
|
||
msg = f" ✓ 处理成功"
|
||
if ai_replacement_count > 0:
|
||
msg += f",AI识别 {ai_replacement_count} 处"
|
||
if replacement_count > 0:
|
||
msg += f",规则匹配 {replacement_count} 处"
|
||
print(msg)
|
||
else:
|
||
print(f" ⚠ 处理完成,但未找到需要替换的内容(可能已包含占位符)")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f" ✗ 处理失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
|
||
def process_all_templates():
|
||
"""
|
||
处理所有已转换的 .docx 模板文件
|
||
"""
|
||
print("="*80)
|
||
print("处理已转换的 .docx 模板文档(跳过 .doc 转换)")
|
||
print("="*80)
|
||
print()
|
||
|
||
if not ORIGINAL_TEMPLATES_DIR.exists():
|
||
print(f"错误: 原始模板目录不存在: {ORIGINAL_TEMPLATES_DIR}")
|
||
return
|
||
|
||
# 统计信息
|
||
processed_count = 0
|
||
skipped_count = 0
|
||
failed_count = 0
|
||
|
||
# 统计总文件数
|
||
all_files = []
|
||
for root, dirs, files in os.walk(ORIGINAL_TEMPLATES_DIR):
|
||
for file in files:
|
||
if file.endswith('.docx'):
|
||
all_files.append(Path(root) / file)
|
||
|
||
total_files = len(all_files)
|
||
print(f"找到 {total_files} 个 .docx 文件需要处理\n")
|
||
|
||
# 遍历所有文件,只处理 .docx 文件
|
||
file_index = 0
|
||
for root, dirs, files in os.walk(ORIGINAL_TEMPLATES_DIR):
|
||
for file in files:
|
||
# 只处理 .docx 文件,跳过 .doc 文件
|
||
if not file.endswith('.docx'):
|
||
continue
|
||
|
||
file_index += 1
|
||
input_path = Path(root) / file
|
||
|
||
# 识别文档类型
|
||
doc_config = identify_document_type(file)
|
||
|
||
if not doc_config:
|
||
print(f"\n⚠ 无法识别文档类型: {file}")
|
||
print(f" 路径: {input_path}")
|
||
skipped_count += 1
|
||
continue
|
||
|
||
# 生成输出路径(保持相对目录结构)
|
||
relative_path = input_path.relative_to(ORIGINAL_TEMPLATES_DIR)
|
||
# 清理文件名(移除转换标记)
|
||
clean_name = Path(file).stem
|
||
clean_name = clean_name.replace("_转自DOC", "").replace("转自DOC", "")
|
||
clean_name = clean_name.replace("(XXX)", "").replace("(XXX)", "").replace("XXX", "")
|
||
output_path = OUTPUT_TEMPLATES_DIR / relative_path.parent / f"{clean_name}.docx"
|
||
|
||
print(f"\n{'='*80}")
|
||
print(f"[{file_index}/{total_files}] 处理: {file}")
|
||
print(f"{'='*80}")
|
||
print(f" 类型: {doc_config.get('template_code', 'UNKNOWN')}")
|
||
print(f" 输入: {input_path}")
|
||
print(f" 输出: {output_path}")
|
||
|
||
# 处理文档(使用AI分析)
|
||
if process_document(input_path, output_path, doc_config, use_ai=True):
|
||
processed_count += 1
|
||
else:
|
||
failed_count += 1
|
||
|
||
# 输出统计信息
|
||
print("\n" + "="*80)
|
||
print("处理完成")
|
||
print("="*80)
|
||
print(f"成功处理: {processed_count} 个文件")
|
||
print(f"跳过: {skipped_count} 个文件(无法识别类型)")
|
||
print(f"失败: {failed_count} 个文件")
|
||
print(f"\n处理后的模板保存在: {OUTPUT_TEMPLATES_DIR}")
|
||
print("\n请检查生成的模板文件,确认占位符是否正确添加。")
|
||
print("如有需要,请手动调整占位符位置。")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
process_all_templates()
|