From 4d9080855c878c6d7752e6addeb3f1833a319c46 Mon Sep 17 00:00:00 2001
From: python <liji517@qq.com>
Date: Thu, 11 Dec 2025 16:34:50 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=96=87=E6=A1=A3=E6=9C=8D?=
 =?UTF-8?q?=E5=8A=A1=E4=B8=AD=E7=9A=84=E8=A1=A8=E6=A0=BC=E5=A4=84=E7=90=86?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8C=E4=BD=BF=E7=94=A8=E7=B4=A2=E5=BC=95?=
 =?UTF-8?q?=E6=96=B9=E5=BC=8F=E8=AE=BF=E9=97=AE=E8=A1=8C=E5=92=8C=E5=8D=95?=
 =?UTF-8?q?=E5=85=83=E6=A0=BC=E4=BB=A5=E9=81=BF=E5=85=8D=E8=BF=AD=E4=BB=A3?=
 =?UTF-8?q?=E6=97=B6=E7=9A=84=E7=B4=A2=E5=BC=95=E9=94=99=E8=AF=AF=EF=BC=8C?=
 =?UTF-8?q?=E5=90=8C=E6=97=B6=E5=A2=9E=E5=BC=BA=E5=AF=B9=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E6=83=85=E5=86=B5=E7=9A=84=E5=A4=84=E7=90=86=EF=BC=8C=E7=A1=AE?=
 =?UTF-8?q?=E4=BF=9D=E5=9C=A8=E8=AE=BF=E9=97=AE=E8=A1=8C=E3=80=81=E5=8D=95?=
 =?UTF-8?q?=E5=85=83=E6=A0=BC=E5=92=8C=E6=AE=B5=E8=90=BD=E6=97=B6=E7=9A=84?=
 =?UTF-8?q?=E7=A8=B3=E5=AE=9A=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 services/document_service.py | 233 +++++++++++++++++++++++++++++++----
 1 file changed, 212 insertions(+), 21 deletions(-)

diff --git a/services/document_service.py b/services/document_service.py
index 0bff6b7..6f6a865 100644
--- a/services/document_service.py
+++ b/services/document_service.py
@@ -165,15 +165,42 @@ class DocumentService:
                         print(f"[WARN] 扫描表格 {table_idx} 时无法获取行数，跳过该表格: {str(e)}")
                         continue
                     
-                    for row_idx, row in enumerate(table.rows):
+                    # 使用索引方式访问行，而不是迭代器，避免在迭代时触发内部索引访问错误
+                    try:
+                        row_count = len(table.rows)
+                    except Exception:
+                        row_count = 0
+                    
+                    for row_idx in range(row_count):
                         try:
+                            # 使用索引访问行，而不是迭代器
+                            row = table.rows[row_idx]
+                            
                             # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
                             if not hasattr(row, 'cells'):
                                 continue
                             
                             # 使用 try-except 包裹，防止 IndexError
                             try:
-                                cells = row.cells
+                                # 先尝试获取cells的数量
+                                try:
+                                    cell_count = len(row.cells)
+                                except (IndexError, AttributeError):
+                                    cell_count = 0
+                                
+                                if cell_count == 0:
+                                    continue
+                                
+                                # 使用索引方式访问cells，而不是迭代器
+                                cells = []
+                                for cell_idx in range(cell_count):
+                                    try:
+                                        cell = row.cells[cell_idx]
+                                        cells.append(cell)
+                                    except (IndexError, AttributeError):
+                                        # 如果某个单元格无法访问，跳过
+                                        continue
+                                
                                 if not cells:
                                     continue
                             except (IndexError, AttributeError) as e:
@@ -187,7 +214,19 @@ class DocumentService:
                                     
                                     # 安全地获取paragraphs列表
                                     try:
-                                        paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                        # 先尝试获取paragraphs的数量
+                                        try:
+                                            para_count = len(cell.paragraphs)
+                                        except (IndexError, AttributeError):
+                                            para_count = 0
+                                        
+                                        paragraphs = []
+                                        for para_idx in range(para_count):
+                                            try:
+                                                para = cell.paragraphs[para_idx]
+                                                paragraphs.append(para)
+                                            except (IndexError, AttributeError):
+                                                continue
                                     except (IndexError, AttributeError) as e:
                                         print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落，跳过: {str(e)}")
                                         continue
@@ -447,16 +486,37 @@ class DocumentService:
                             print(f"[WARN] 无法获取表格 {table_idx} 的行数，跳过该表格: {str(e)}")
                             continue
                         
-                        for row_idx, row in enumerate(table.rows):
+                        # 使用索引方式访问行，而不是迭代器，避免在迭代时触发内部索引访问错误
+                        for row_idx in range(row_count):
                             try:
+                                # 使用索引访问行，而不是迭代器
+                                row = table.rows[row_idx]
+                                
                                 # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
                                 if not hasattr(row, 'cells'):
                                     continue
                                 
                                 # 使用 try-except 包裹，防止 IndexError
                                 try:
-                                    # 尝试获取cells，如果失败则跳过该行
-                                    cells = row.cells
+                                    # 先尝试获取cells的数量
+                                    try:
+                                        cell_count = len(row.cells)
+                                    except (IndexError, AttributeError):
+                                        cell_count = 0
+                                    
+                                    if cell_count == 0:
+                                        continue
+                                    
+                                    # 使用索引方式访问cells，而不是迭代器
+                                    cells = []
+                                    for cell_idx in range(cell_count):
+                                        try:
+                                            cell = row.cells[cell_idx]
+                                            cells.append(cell)
+                                        except (IndexError, AttributeError):
+                                            # 如果某个单元格无法访问，跳过
+                                            continue
+                                    
                                     if not cells:
                                         continue
                                 except (IndexError, AttributeError) as e:
@@ -472,7 +532,19 @@ class DocumentService:
                                         
                                         # 安全地获取paragraphs列表
                                         try:
-                                            paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                            # 先尝试获取paragraphs的数量
+                                            try:
+                                                para_count = len(cell.paragraphs)
+                                            except (IndexError, AttributeError):
+                                                para_count = 0
+                                            
+                                            paragraphs = []
+                                            for para_idx in range(para_count):
+                                                try:
+                                                    para = cell.paragraphs[para_idx]
+                                                    paragraphs.append(para)
+                                                except (IndexError, AttributeError):
+                                                    continue
                                         except (IndexError, AttributeError) as e:
                                             print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 无法访问段落，跳过: {str(e)}")
                                             continue
@@ -532,22 +604,43 @@ class DocumentService:
                     if not table.rows:
                         continue
                     
-                    # 安全地获取表格行数
+                    # 安全地获取表格行数，使用索引方式访问行，而不是迭代器
                     try:
                         row_count = len(table.rows)
                     except Exception as e:
-                        print(f"[WARN] 验证表格 {table_idx} 时无法获取行数，跳过该表格: {str(e)}")
+                        print(f"[WARN] 保存前验证表格 {table_idx} 时无法获取行数，跳过该表格: {str(e)}")
                         continue
                     
-                    for row_idx, row in enumerate(table.rows):
+                    for row_idx in range(row_count):
                         try:
+                            # 使用索引访问行，而不是迭代器
+                            row = table.rows[row_idx]
+                            
                             # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
                             if not hasattr(row, 'cells'):
                                 continue
                             
                             # 使用 try-except 包裹，防止 IndexError
                             try:
-                                cells = row.cells
+                                # 先尝试获取cells的数量
+                                try:
+                                    cell_count = len(row.cells)
+                                except (IndexError, AttributeError):
+                                    cell_count = 0
+                                
+                                if cell_count == 0:
+                                    continue
+                                
+                                # 使用索引方式访问cells，而不是迭代器
+                                cells = []
+                                for cell_idx in range(cell_count):
+                                    try:
+                                        cell = row.cells[cell_idx]
+                                        cells.append(cell)
+                                    except (IndexError, AttributeError):
+                                        # 如果某个单元格无法访问，跳过
+                                        continue
+                                
                                 if not cells:
                                     continue
                             except (IndexError, AttributeError) as e:
@@ -561,7 +654,19 @@ class DocumentService:
                                     
                                     # 安全地获取paragraphs列表
                                     try:
-                                        paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                        # 先尝试获取paragraphs的数量
+                                        try:
+                                            para_count = len(cell.paragraphs)
+                                        except (IndexError, AttributeError):
+                                            para_count = 0
+                                        
+                                        paragraphs = []
+                                        for para_idx in range(para_count):
+                                            try:
+                                                para = cell.paragraphs[para_idx]
+                                                paragraphs.append(para)
+                                            except (IndexError, AttributeError):
+                                                continue
                                     except (IndexError, AttributeError) as e:
                                         print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落，跳过: {str(e)}")
                                         continue
@@ -613,28 +718,81 @@ class DocumentService:
                     if field_code:
                         verification_placeholders.add(field_code)
             
-            for table in doc.tables:
+            for table_idx, table in enumerate(doc.tables):
                 try:
                     if not table.rows:
                         continue
-                    for row in table.rows:
+                    
+                    # 安全地获取表格行数，使用索引方式访问行
+                    try:
+                        row_count = len(table.rows)
+                    except Exception:
+                        continue
+                    
+                    for row_idx in range(row_count):
                         try:
+                            # 使用索引访问行，而不是迭代器
+                            row = table.rows[row_idx]
+                            
                             if not hasattr(row, 'cells'):
                                 continue
+                            
                             try:
-                                cells = row.cells
+                                # 先尝试获取cells的数量
+                                try:
+                                    cell_count = len(row.cells)
+                                except (IndexError, AttributeError):
+                                    cell_count = 0
+                                
+                                if cell_count == 0:
+                                    continue
+                                
+                                # 使用索引方式访问cells，而不是迭代器
+                                cells = []
+                                for cell_idx in range(cell_count):
+                                    try:
+                                        cell = row.cells[cell_idx]
+                                        cells.append(cell)
+                                    except (IndexError, AttributeError):
+                                        continue
+                                
+                                if not cells:
+                                    continue
                             except (IndexError, AttributeError):
                                 continue
-                            for cell in cells:
+                            
+                            for cell_idx, cell in enumerate(cells):
                                 try:
-                                    if hasattr(cell, 'paragraphs'):
-                                        for paragraph in cell.paragraphs:
+                                    if not hasattr(cell, 'paragraphs'):
+                                        continue
+                                    
+                                    # 安全地获取paragraphs列表
+                                    try:
+                                        try:
+                                            para_count = len(cell.paragraphs)
+                                        except (IndexError, AttributeError):
+                                            para_count = 0
+                                        
+                                        paragraphs = []
+                                        for para_idx in range(para_count):
+                                            try:
+                                                para = cell.paragraphs[para_idx]
+                                                paragraphs.append(para)
+                                            except (IndexError, AttributeError):
+                                                continue
+                                    except (IndexError, AttributeError):
+                                        continue
+                                    
+                                    for paragraph in paragraphs:
+                                        try:
                                             text = paragraph.text
                                             matches = placeholder_pattern.findall(text)
                                             for match in matches:
                                                 field_code = match.strip()
                                                 if field_code:
                                                     verification_placeholders.add(field_code)
+                                        except Exception:
+                                            continue
                                 except Exception:
                                     continue
                         except Exception:
@@ -688,13 +846,34 @@ class DocumentService:
                             except Exception:
                                 continue
                             
-                            for row_idx, row in enumerate(table.rows):
+                            # 使用索引方式访问行，而不是迭代器
+                            for row_idx in range(row_count):
                                 try:
+                                    # 使用索引访问行，而不是迭代器
+                                    row = table.rows[row_idx]
+                                    
                                     if not hasattr(row, 'cells'):
                                         continue
                                     
                                     try:
-                                        cells = row.cells
+                                        # 先尝试获取cells的数量
+                                        try:
+                                            cell_count = len(row.cells)
+                                        except (IndexError, AttributeError):
+                                            cell_count = 0
+                                        
+                                        if cell_count == 0:
+                                            continue
+                                        
+                                        # 使用索引方式访问cells，而不是迭代器
+                                        cells = []
+                                        for cell_idx in range(cell_count):
+                                            try:
+                                                cell = row.cells[cell_idx]
+                                                cells.append(cell)
+                                            except (IndexError, AttributeError):
+                                                continue
+                                        
                                         if not cells:
                                             continue
                                     except (IndexError, AttributeError):
@@ -707,7 +886,19 @@ class DocumentService:
                                             
                                             # 安全地获取paragraphs列表
                                             try:
-                                                paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                                # 先尝试获取paragraphs的数量
+                                                try:
+                                                    para_count = len(cell.paragraphs)
+                                                except (IndexError, AttributeError):
+                                                    para_count = 0
+                                                
+                                                paragraphs = []
+                                                for para_idx in range(para_count):
+                                                    try:
+                                                        para = cell.paragraphs[para_idx]
+                                                        paragraphs.append(para)
+                                                    except (IndexError, AttributeError):
+                                                        continue
                                             except (IndexError, AttributeError):
                                                 continue