优化文档服务中的表格处理逻辑,使用索引方式访问行和单元格以避免迭代时的索引错误,同时增强对异常情况的处理,确保在访问行、单元格和段落时的稳定性
This commit is contained in:
parent
91fcd5461d
commit
4d9080855c
@ -165,15 +165,42 @@ class DocumentService:
|
||||
print(f"[WARN] 扫描表格 {table_idx} 时无法获取行数,跳过该表格: {str(e)}")
|
||||
continue
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
# 使用索引方式访问行,而不是迭代器,避免在迭代时触发内部索引访问错误
|
||||
try:
|
||||
row_count = len(table.rows)
|
||||
except Exception:
|
||||
row_count = 0
|
||||
|
||||
for row_idx in range(row_count):
|
||||
try:
|
||||
# 使用索引访问行,而不是迭代器
|
||||
row = table.rows[row_idx]
|
||||
|
||||
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||
if not hasattr(row, 'cells'):
|
||||
continue
|
||||
|
||||
# 使用 try-except 包裹,防止 IndexError
|
||||
try:
|
||||
cells = row.cells
|
||||
# 先尝试获取cells的数量
|
||||
try:
|
||||
cell_count = len(row.cells)
|
||||
except (IndexError, AttributeError):
|
||||
cell_count = 0
|
||||
|
||||
if cell_count == 0:
|
||||
continue
|
||||
|
||||
# 使用索引方式访问cells,而不是迭代器
|
||||
cells = []
|
||||
for cell_idx in range(cell_count):
|
||||
try:
|
||||
cell = row.cells[cell_idx]
|
||||
cells.append(cell)
|
||||
except (IndexError, AttributeError):
|
||||
# 如果某个单元格无法访问,跳过
|
||||
continue
|
||||
|
||||
if not cells:
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
@ -187,7 +214,19 @@ class DocumentService:
|
||||
|
||||
# 安全地获取paragraphs列表
|
||||
try:
|
||||
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||
# 先尝试获取paragraphs的数量
|
||||
try:
|
||||
para_count = len(cell.paragraphs)
|
||||
except (IndexError, AttributeError):
|
||||
para_count = 0
|
||||
|
||||
paragraphs = []
|
||||
for para_idx in range(para_count):
|
||||
try:
|
||||
para = cell.paragraphs[para_idx]
|
||||
paragraphs.append(para)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落,跳过: {str(e)}")
|
||||
continue
|
||||
@ -447,16 +486,37 @@ class DocumentService:
|
||||
print(f"[WARN] 无法获取表格 {table_idx} 的行数,跳过该表格: {str(e)}")
|
||||
continue
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
# 使用索引方式访问行,而不是迭代器,避免在迭代时触发内部索引访问错误
|
||||
for row_idx in range(row_count):
|
||||
try:
|
||||
# 使用索引访问行,而不是迭代器
|
||||
row = table.rows[row_idx]
|
||||
|
||||
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||
if not hasattr(row, 'cells'):
|
||||
continue
|
||||
|
||||
# 使用 try-except 包裹,防止 IndexError
|
||||
try:
|
||||
# 尝试获取cells,如果失败则跳过该行
|
||||
cells = row.cells
|
||||
# 先尝试获取cells的数量
|
||||
try:
|
||||
cell_count = len(row.cells)
|
||||
except (IndexError, AttributeError):
|
||||
cell_count = 0
|
||||
|
||||
if cell_count == 0:
|
||||
continue
|
||||
|
||||
# 使用索引方式访问cells,而不是迭代器
|
||||
cells = []
|
||||
for cell_idx in range(cell_count):
|
||||
try:
|
||||
cell = row.cells[cell_idx]
|
||||
cells.append(cell)
|
||||
except (IndexError, AttributeError):
|
||||
# 如果某个单元格无法访问,跳过
|
||||
continue
|
||||
|
||||
if not cells:
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
@ -472,7 +532,19 @@ class DocumentService:
|
||||
|
||||
# 安全地获取paragraphs列表
|
||||
try:
|
||||
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||
# 先尝试获取paragraphs的数量
|
||||
try:
|
||||
para_count = len(cell.paragraphs)
|
||||
except (IndexError, AttributeError):
|
||||
para_count = 0
|
||||
|
||||
paragraphs = []
|
||||
for para_idx in range(para_count):
|
||||
try:
|
||||
para = cell.paragraphs[para_idx]
|
||||
paragraphs.append(para)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 无法访问段落,跳过: {str(e)}")
|
||||
continue
|
||||
@ -532,22 +604,43 @@ class DocumentService:
|
||||
if not table.rows:
|
||||
continue
|
||||
|
||||
# 安全地获取表格行数
|
||||
# 安全地获取表格行数,使用索引方式访问行,而不是迭代器
|
||||
try:
|
||||
row_count = len(table.rows)
|
||||
except Exception as e:
|
||||
print(f"[WARN] 验证表格 {table_idx} 时无法获取行数,跳过该表格: {str(e)}")
|
||||
print(f"[WARN] 保存前验证表格 {table_idx} 时无法获取行数,跳过该表格: {str(e)}")
|
||||
continue
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
for row_idx in range(row_count):
|
||||
try:
|
||||
# 使用索引访问行,而不是迭代器
|
||||
row = table.rows[row_idx]
|
||||
|
||||
# 安全地访问 row.cells,避免 docx 库在处理异常表格结构时的 bug
|
||||
if not hasattr(row, 'cells'):
|
||||
continue
|
||||
|
||||
# 使用 try-except 包裹,防止 IndexError
|
||||
try:
|
||||
cells = row.cells
|
||||
# 先尝试获取cells的数量
|
||||
try:
|
||||
cell_count = len(row.cells)
|
||||
except (IndexError, AttributeError):
|
||||
cell_count = 0
|
||||
|
||||
if cell_count == 0:
|
||||
continue
|
||||
|
||||
# 使用索引方式访问cells,而不是迭代器
|
||||
cells = []
|
||||
for cell_idx in range(cell_count):
|
||||
try:
|
||||
cell = row.cells[cell_idx]
|
||||
cells.append(cell)
|
||||
except (IndexError, AttributeError):
|
||||
# 如果某个单元格无法访问,跳过
|
||||
continue
|
||||
|
||||
if not cells:
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
@ -561,7 +654,19 @@ class DocumentService:
|
||||
|
||||
# 安全地获取paragraphs列表
|
||||
try:
|
||||
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||
# 先尝试获取paragraphs的数量
|
||||
try:
|
||||
para_count = len(cell.paragraphs)
|
||||
except (IndexError, AttributeError):
|
||||
para_count = 0
|
||||
|
||||
paragraphs = []
|
||||
for para_idx in range(para_count):
|
||||
try:
|
||||
para = cell.paragraphs[para_idx]
|
||||
paragraphs.append(para)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
except (IndexError, AttributeError) as e:
|
||||
print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落,跳过: {str(e)}")
|
||||
continue
|
||||
@ -613,28 +718,81 @@ class DocumentService:
|
||||
if field_code:
|
||||
verification_placeholders.add(field_code)
|
||||
|
||||
for table in doc.tables:
|
||||
for table_idx, table in enumerate(doc.tables):
|
||||
try:
|
||||
if not table.rows:
|
||||
continue
|
||||
for row in table.rows:
|
||||
|
||||
# 安全地获取表格行数,使用索引方式访问行
|
||||
try:
|
||||
row_count = len(table.rows)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for row_idx in range(row_count):
|
||||
try:
|
||||
# 使用索引访问行,而不是迭代器
|
||||
row = table.rows[row_idx]
|
||||
|
||||
if not hasattr(row, 'cells'):
|
||||
continue
|
||||
|
||||
try:
|
||||
cells = row.cells
|
||||
# 先尝试获取cells的数量
|
||||
try:
|
||||
cell_count = len(row.cells)
|
||||
except (IndexError, AttributeError):
|
||||
cell_count = 0
|
||||
|
||||
if cell_count == 0:
|
||||
continue
|
||||
|
||||
# 使用索引方式访问cells,而不是迭代器
|
||||
cells = []
|
||||
for cell_idx in range(cell_count):
|
||||
try:
|
||||
cell = row.cells[cell_idx]
|
||||
cells.append(cell)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
|
||||
if not cells:
|
||||
continue
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
for cell in cells:
|
||||
|
||||
for cell_idx, cell in enumerate(cells):
|
||||
try:
|
||||
if hasattr(cell, 'paragraphs'):
|
||||
for paragraph in cell.paragraphs:
|
||||
if not hasattr(cell, 'paragraphs'):
|
||||
continue
|
||||
|
||||
# 安全地获取paragraphs列表
|
||||
try:
|
||||
try:
|
||||
para_count = len(cell.paragraphs)
|
||||
except (IndexError, AttributeError):
|
||||
para_count = 0
|
||||
|
||||
paragraphs = []
|
||||
for para_idx in range(para_count):
|
||||
try:
|
||||
para = cell.paragraphs[para_idx]
|
||||
paragraphs.append(para)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
|
||||
for paragraph in paragraphs:
|
||||
try:
|
||||
text = paragraph.text
|
||||
matches = placeholder_pattern.findall(text)
|
||||
for match in matches:
|
||||
field_code = match.strip()
|
||||
if field_code:
|
||||
verification_placeholders.add(field_code)
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
@ -688,13 +846,34 @@ class DocumentService:
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for row_idx, row in enumerate(table.rows):
|
||||
# 使用索引方式访问行,而不是迭代器
|
||||
for row_idx in range(row_count):
|
||||
try:
|
||||
# 使用索引访问行,而不是迭代器
|
||||
row = table.rows[row_idx]
|
||||
|
||||
if not hasattr(row, 'cells'):
|
||||
continue
|
||||
|
||||
try:
|
||||
cells = row.cells
|
||||
# 先尝试获取cells的数量
|
||||
try:
|
||||
cell_count = len(row.cells)
|
||||
except (IndexError, AttributeError):
|
||||
cell_count = 0
|
||||
|
||||
if cell_count == 0:
|
||||
continue
|
||||
|
||||
# 使用索引方式访问cells,而不是迭代器
|
||||
cells = []
|
||||
for cell_idx in range(cell_count):
|
||||
try:
|
||||
cell = row.cells[cell_idx]
|
||||
cells.append(cell)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
|
||||
if not cells:
|
||||
continue
|
||||
except (IndexError, AttributeError):
|
||||
@ -707,7 +886,19 @@ class DocumentService:
|
||||
|
||||
# 安全地获取paragraphs列表
|
||||
try:
|
||||
paragraphs = list(cell.paragraphs) if cell.paragraphs else []
|
||||
# 先尝试获取paragraphs的数量
|
||||
try:
|
||||
para_count = len(cell.paragraphs)
|
||||
except (IndexError, AttributeError):
|
||||
para_count = 0
|
||||
|
||||
paragraphs = []
|
||||
for para_idx in range(para_count):
|
||||
try:
|
||||
para = cell.paragraphs[para_idx]
|
||||
paragraphs.append(para)
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
except (IndexError, AttributeError):
|
||||
continue
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user