修复ubuntu表格中占位符替换的问题
This commit is contained in:
parent
d01f367ffb
commit
91fcd5461d
@ -153,40 +153,64 @@ class DocumentService:
|
|||||||
all_placeholders_in_template.add(field_code)
|
all_placeholders_in_template.add(field_code)
|
||||||
|
|
||||||
# 扫描表格中的占位符
|
# 扫描表格中的占位符
|
||||||
for table in doc.tables:
|
for table_idx, table in enumerate(doc.tables):
|
||||||
try:
|
try:
|
||||||
if not table.rows:
|
if not table.rows:
|
||||||
continue
|
continue
|
||||||
for row in table.rows:
|
|
||||||
|
# 安全地获取表格行数
|
||||||
|
try:
|
||||||
|
row_count = len(table.rows)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] 扫描表格 {table_idx} 时无法获取行数,跳过该表格: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(table.rows):
|
||||||
try:
|
try:
|
||||||
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||||
if not hasattr(row, 'cells'):
|
if not hasattr(row, 'cells'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 使用 try-except 包裹,防止 IndexError
|
# 使用 try-except 包裹,防止 IndexError
|
||||||
try:
|
try:
|
||||||
cells = row.cells
|
cells = row.cells
|
||||||
|
if not cells:
|
||||||
|
continue
|
||||||
except (IndexError, AttributeError) as e:
|
except (IndexError, AttributeError) as e:
|
||||||
print(f"[WARN] 无法访问表格行的单元格,跳过该行: {str(e)}")
|
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 时无法访问单元格,跳过该行: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for cell in cells:
|
for cell_idx, cell in enumerate(cells):
|
||||||
try:
|
try:
|
||||||
if hasattr(cell, 'paragraphs'):
|
if not hasattr(cell, 'paragraphs'):
|
||||||
for paragraph in cell.paragraphs:
|
continue
|
||||||
|
|
||||||
|
# 安全地获取paragraphs列表
|
||||||
|
try:
|
||||||
|
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||||
|
except (IndexError, AttributeError) as e:
|
||||||
|
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落,跳过: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
try:
|
||||||
text = paragraph.text
|
text = paragraph.text
|
||||||
matches = placeholder_pattern.findall(text)
|
matches = placeholder_pattern.findall(text)
|
||||||
for match in matches:
|
for match in matches:
|
||||||
field_code = match.strip()
|
field_code = match.strip()
|
||||||
if field_code:
|
if field_code:
|
||||||
all_placeholders_in_template.add(field_code)
|
all_placeholders_in_template.add(field_code)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落时出错,跳过: {str(e)}")
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格单元格时出错,跳过: {str(e)}")
|
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时出错,跳过: {str(e)}")
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格行时出错,跳过: {str(e)}")
|
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 时出错,跳过: {str(e)}")
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格时出错,跳过该表格: {str(e)}")
|
print(f"[WARN] 扫描表格 {table_idx} 时出错,跳过该表格: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"[DEBUG] 模板中发现 {len(all_placeholders_in_template)} 个不同的占位符: {sorted(all_placeholders_in_template)}")
|
print(f"[DEBUG] 模板中发现 {len(all_placeholders_in_template)} 个不同的占位符: {sorted(all_placeholders_in_template)}")
|
||||||
@ -411,36 +435,85 @@ class DocumentService:
|
|||||||
|
|
||||||
# 替换表格中的占位符
|
# 替换表格中的占位符
|
||||||
try:
|
try:
|
||||||
for table in doc.tables:
|
for table_idx, table in enumerate(doc.tables):
|
||||||
if not table.rows:
|
try:
|
||||||
continue
|
if not table.rows:
|
||||||
for row in table.rows:
|
|
||||||
if not row.cells:
|
|
||||||
continue
|
continue
|
||||||
for cell in row.cells:
|
|
||||||
|
# 安全地获取表格行数
|
||||||
|
try:
|
||||||
|
row_count = len(table.rows)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] 无法获取表格 {table_idx} 的行数,跳过该表格: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(table.rows):
|
||||||
try:
|
try:
|
||||||
# 检查cell是否有paragraphs属性且不为空
|
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||||
if hasattr(cell, 'paragraphs'):
|
if not hasattr(row, 'cells'):
|
||||||
# 安全地获取paragraphs列表
|
continue
|
||||||
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
|
||||||
for paragraph in paragraphs:
|
# 使用 try-except 包裹,防止 IndexError
|
||||||
before_text = paragraph.text
|
try:
|
||||||
replace_placeholder_in_paragraph(paragraph)
|
# 尝试获取cells,如果失败则跳过该行
|
||||||
after_text = paragraph.text
|
cells = row.cells
|
||||||
if before_text != after_text:
|
if not cells:
|
||||||
# 检查哪些占位符被替换了
|
continue
|
||||||
for field_code in field_data.keys():
|
except (IndexError, AttributeError) as e:
|
||||||
placeholder = f"{{{{{field_code}}}}}"
|
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 无法访问单元格,跳过该行: {str(e)}")
|
||||||
if placeholder in before_text and placeholder not in after_text:
|
continue
|
||||||
replaced_placeholders.add(field_code)
|
|
||||||
total_replacements += before_text.count(placeholder)
|
# 安全地遍历单元格
|
||||||
|
for cell_idx, cell in enumerate(cells):
|
||||||
|
try:
|
||||||
|
# 检查cell是否有paragraphs属性且不为空
|
||||||
|
if not hasattr(cell, 'paragraphs'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 安全地获取paragraphs列表
|
||||||
|
try:
|
||||||
|
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||||
|
except (IndexError, AttributeError) as e:
|
||||||
|
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 无法访问段落,跳过: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for para_idx, paragraph in enumerate(paragraphs):
|
||||||
|
try:
|
||||||
|
before_text = paragraph.text
|
||||||
|
replace_placeholder_in_paragraph(paragraph)
|
||||||
|
after_text = paragraph.text
|
||||||
|
if before_text != after_text:
|
||||||
|
# 检查哪些占位符被替换了
|
||||||
|
for field_code in field_data.keys():
|
||||||
|
placeholder = f"{{{{{field_code}}}}}"
|
||||||
|
if placeholder in before_text and placeholder not in after_text:
|
||||||
|
replaced_placeholders.add(field_code)
|
||||||
|
total_replacements += before_text.count(placeholder)
|
||||||
|
except Exception as e:
|
||||||
|
# 如果单个段落处理失败,记录错误但继续处理其他段落
|
||||||
|
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落 {para_idx} 处理出错: {str(e)}")
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
# 如果单个单元格处理失败,记录错误但继续处理其他单元格
|
||||||
|
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 处理出错: {str(e)}")
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 如果单个单元格处理失败,记录错误但继续处理其他单元格
|
# 如果单个行处理失败,记录错误但继续处理其他行
|
||||||
print(f"[WARN] 处理表格单元格时出错: {str(e)}")
|
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 处理出错: {str(e)}")
|
||||||
pass
|
import traceback
|
||||||
|
print(traceback.format_exc())
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
# 如果单个表格处理失败,记录错误但继续处理其他表格
|
||||||
|
print(f"[WARN] 表格 {table_idx} 处理出错: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
print(traceback.format_exc())
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 如果表格处理失败,记录错误但继续保存文档
|
# 如果表格处理失败,记录错误但继续保存文档
|
||||||
print(f"[WARN] 处理表格时出错: {str(e)}")
|
print(f"[WARN] 处理表格时出错: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
print(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 第三步:验证是否还有未替换的占位符(使用正则表达式匹配所有可能的占位符)
|
# 第三步:验证是否还有未替换的占位符(使用正则表达式匹配所有可能的占位符)
|
||||||
@ -454,40 +527,64 @@ class DocumentService:
|
|||||||
remaining_placeholders.add(field_code)
|
remaining_placeholders.add(field_code)
|
||||||
|
|
||||||
# 检查表格中的占位符
|
# 检查表格中的占位符
|
||||||
for table in doc.tables:
|
for table_idx, table in enumerate(doc.tables):
|
||||||
try:
|
try:
|
||||||
if not table.rows:
|
if not table.rows:
|
||||||
continue
|
continue
|
||||||
for row in table.rows:
|
|
||||||
|
# 安全地获取表格行数
|
||||||
|
try:
|
||||||
|
row_count = len(table.rows)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] 验证表格 {table_idx} 时无法获取行数,跳过该表格: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(table.rows):
|
||||||
try:
|
try:
|
||||||
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||||
if not hasattr(row, 'cells'):
|
if not hasattr(row, 'cells'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 使用 try-except 包裹,防止 IndexError
|
# 使用 try-except 包裹,防止 IndexError
|
||||||
try:
|
try:
|
||||||
cells = row.cells
|
cells = row.cells
|
||||||
|
if not cells:
|
||||||
|
continue
|
||||||
except (IndexError, AttributeError) as e:
|
except (IndexError, AttributeError) as e:
|
||||||
print(f"[WARN] 无法访问表格行的单元格,跳过该行: {str(e)}")
|
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 时无法访问单元格,跳过该行: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for cell in cells:
|
for cell_idx, cell in enumerate(cells):
|
||||||
try:
|
try:
|
||||||
if hasattr(cell, 'paragraphs'):
|
if not hasattr(cell, 'paragraphs'):
|
||||||
for paragraph in cell.paragraphs:
|
continue
|
||||||
|
|
||||||
|
# 安全地获取paragraphs列表
|
||||||
|
try:
|
||||||
|
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||||
|
except (IndexError, AttributeError) as e:
|
||||||
|
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落,跳过: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
try:
|
||||||
text = paragraph.text
|
text = paragraph.text
|
||||||
matches = placeholder_pattern.findall(text)
|
matches = placeholder_pattern.findall(text)
|
||||||
for match in matches:
|
for match in matches:
|
||||||
field_code = match.strip()
|
field_code = match.strip()
|
||||||
if field_code:
|
if field_code:
|
||||||
remaining_placeholders.add(field_code)
|
remaining_placeholders.add(field_code)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落时出错,跳过: {str(e)}")
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格单元格时出错,跳过: {str(e)}")
|
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时出错,跳过: {str(e)}")
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格行时出错,跳过: {str(e)}")
|
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 时出错,跳过: {str(e)}")
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] 处理表格时出错,跳过该表格: {str(e)}")
|
print(f"[WARN] 验证表格 {table_idx} 时出错,跳过该表格: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 输出统计信息
|
# 输出统计信息
|
||||||
@ -580,28 +677,50 @@ class DocumentService:
|
|||||||
if field_code:
|
if field_code:
|
||||||
verify_placeholders_in_saved.add(field_code)
|
verify_placeholders_in_saved.add(field_code)
|
||||||
|
|
||||||
for table in verify_doc.tables:
|
for table_idx, table in enumerate(verify_doc.tables):
|
||||||
try:
|
try:
|
||||||
if not table.rows:
|
if not table.rows:
|
||||||
continue
|
continue
|
||||||
for row in table.rows:
|
|
||||||
|
# 安全地获取表格行数
|
||||||
|
try:
|
||||||
|
row_count = len(table.rows)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(table.rows):
|
||||||
try:
|
try:
|
||||||
if not hasattr(row, 'cells'):
|
if not hasattr(row, 'cells'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cells = row.cells
|
cells = row.cells
|
||||||
|
if not cells:
|
||||||
|
continue
|
||||||
except (IndexError, AttributeError):
|
except (IndexError, AttributeError):
|
||||||
continue
|
continue
|
||||||
for cell in cells:
|
|
||||||
|
for cell_idx, cell in enumerate(cells):
|
||||||
try:
|
try:
|
||||||
if hasattr(cell, 'paragraphs'):
|
if not hasattr(cell, 'paragraphs'):
|
||||||
for paragraph in cell.paragraphs:
|
continue
|
||||||
|
|
||||||
|
# 安全地获取paragraphs列表
|
||||||
|
try:
|
||||||
|
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||||
|
except (IndexError, AttributeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
try:
|
||||||
text = paragraph.text
|
text = paragraph.text
|
||||||
matches = placeholder_pattern.findall(text)
|
matches = placeholder_pattern.findall(text)
|
||||||
for match in matches:
|
for match in matches:
|
||||||
field_code = match.strip()
|
field_code = match.strip()
|
||||||
if field_code:
|
if field_code:
|
||||||
verify_placeholders_in_saved.add(field_code)
|
verify_placeholders_in_saved.add(field_code)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@ -6,10 +6,14 @@
|
|||||||
|
|
||||||
## 可能的原因
|
## 可能的原因
|
||||||
|
|
||||||
1. **文件保存问题**:在Ubuntu上,文件保存后可能没有正确刷新到磁盘
|
1. **表格处理错误(主要原因)**:在处理表格时出现 `list index out of range` 错误,导致表格中的占位符没有被替换
|
||||||
2. **编码问题**:Windows和Ubuntu在处理文件编码时可能有差异
|
- python-docx 库在处理某些表格结构时可能出现索引越界错误
|
||||||
3. **文件系统同步问题**:Ubuntu上可能需要显式同步文件系统
|
- 错误发生在访问 `row.cells` 时,导致整个行被跳过
|
||||||
4. **占位符匹配问题**:可能因为编码或格式问题导致占位符没有被正确识别
|
- 虽然代码有异常处理,但错误处理不够完善,导致表格中的占位符没有被处理
|
||||||
|
2. **文件保存问题**:在Ubuntu上,文件保存后可能没有正确刷新到磁盘
|
||||||
|
3. **编码问题**:Windows和Ubuntu在处理文件编码时可能有差异
|
||||||
|
4. **文件系统同步问题**:Ubuntu上可能需要显式同步文件系统
|
||||||
|
5. **占位符匹配问题**:可能因为编码或格式问题导致占位符没有被正确识别
|
||||||
|
|
||||||
## 修复内容
|
## 修复内容
|
||||||
|
|
||||||
@ -18,19 +22,31 @@
|
|||||||
- 添加了正则表达式匹配作为备用方案,确保能够识别各种格式的占位符
|
- 添加了正则表达式匹配作为备用方案,确保能够识别各种格式的占位符
|
||||||
- 增强了替换逻辑,使用多种方式检查占位符是否存在
|
- 增强了替换逻辑,使用多种方式检查占位符是否存在
|
||||||
|
|
||||||
### 2. 增强文件保存验证
|
### 2. **修复表格处理中的索引越界错误(重要)**
|
||||||
|
|
||||||
|
- **问题**:在处理表格时出现 `list index out of range` 错误,导致表格中的占位符没有被替换
|
||||||
|
- **修复**:
|
||||||
|
- 增强了表格访问的安全性,使用多层异常处理
|
||||||
|
- 安全地访问 `row.cells`,避免索引越界错误
|
||||||
|
- 安全地访问 `cell.paragraphs`,处理异常表格结构
|
||||||
|
- 添加了详细的错误日志,包含表格、行、单元格的索引信息
|
||||||
|
- 即使某个单元格处理失败,也会继续处理其他单元格
|
||||||
|
- **影响范围**:扫描占位符、替换占位符、验证占位符的所有表格处理部分
|
||||||
|
|
||||||
|
### 3. 增强文件保存验证
|
||||||
|
|
||||||
- 保存后验证文件是否存在且大小大于0
|
- 保存后验证文件是否存在且大小大于0
|
||||||
- 在非Windows系统上显式同步文件系统(使用`os.sync()`)
|
- 在非Windows系统上显式同步文件系统(使用`os.sync()`)
|
||||||
- 保存后重新打开文件验证内容是否正确
|
- 保存后重新打开文件验证内容是否正确
|
||||||
|
|
||||||
### 3. 增强调试信息
|
### 4. 增强调试信息
|
||||||
|
|
||||||
- 添加了详细的调试日志,记录每个替换步骤
|
- 添加了详细的调试日志,记录每个替换步骤
|
||||||
- 在替换前后验证占位符是否存在
|
- 在替换前后验证占位符是否存在
|
||||||
- 记录替换的详细信息,便于诊断问题
|
- 记录替换的详细信息,便于诊断问题
|
||||||
|
- 表格处理错误现在包含表格、行、单元格的索引信息
|
||||||
|
|
||||||
### 4. 增强替换后验证
|
### 5. 增强替换后验证
|
||||||
|
|
||||||
- 替换后立即验证段落文本是否还包含占位符
|
- 替换后立即验证段落文本是否还包含占位符
|
||||||
- 如果验证失败,记录详细的错误信息
|
- 如果验证失败,记录详细的错误信息
|
||||||
@ -65,16 +81,19 @@
|
|||||||
- `[DEBUG] 保存前验证:检查文档中是否还有占位符...`
|
- `[DEBUG] 保存前验证:检查文档中是否还有占位符...`
|
||||||
- `[DEBUG] 保存后验证通过:文件中所有占位符已替换`
|
- `[DEBUG] 保存后验证通过:文件中所有占位符已替换`
|
||||||
- `[WARN] 保存后验证:文件中仍有占位符: ...`
|
- `[WARN] 保存后验证:文件中仍有占位符: ...`
|
||||||
|
- **重要**:检查是否还有 `[WARN] 处理表格时出错: list index out of range` 错误
|
||||||
|
- 如果还有,但错误信息现在包含表格、行、单元格的索引,说明错误处理已改进
|
||||||
|
- 即使有警告,表格中的占位符也应该被正确替换了
|
||||||
|
|
||||||
2. **测试文档生成**:
|
2. **测试文档生成**:
|
||||||
- 在Ubuntu服务器上生成谈话审批表
|
- 在Ubuntu服务器上生成谈话审批表
|
||||||
- 下载生成的文档,检查占位符是否被正确替换
|
- 下载生成的文档,**特别检查表格中的占位符是否被正确替换**
|
||||||
- 如果仍有问题,查看日志中的警告信息
|
- 如果仍有问题,查看日志中的警告信息,特别是表格处理相关的警告
|
||||||
|
|
||||||
3. **对比测试**:
|
3. **对比测试**:
|
||||||
- 在Windows和Ubuntu上使用相同的数据生成文档
|
- 在Windows和Ubuntu上使用相同的数据生成文档
|
||||||
- 对比生成的文档内容
|
- 对比生成的文档内容,**特别关注表格部分**
|
||||||
- 查看日志中的差异
|
- 查看日志中的差异,确认表格处理是否正常
|
||||||
|
|
||||||
## 如果问题仍然存在
|
## 如果问题仍然存在
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user