From 91fcd5461dd13581a1f87aca9db37f1fd70c1b55 Mon Sep 17 00:00:00 2001
From: python <liji517@qq.com>
Date: Thu, 11 Dec 2025 16:30:42 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dubuntu=E8=A1=A8=E6=A0=BC?=
 =?UTF-8?q?=E4=B8=AD=E5=8D=A0=E4=BD=8D=E7=AC=A6=E6=9B=BF=E6=8D=A2=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 services/document_service.py | 213 +++++++++++++++++++++++++++--------
 修复Ubuntu占位符替换问题.md  |  41 +++++--
 2 files changed, 196 insertions(+), 58 deletions(-)

diff --git a/services/document_service.py b/services/document_service.py
index 60ed11d..0bff6b7 100644
--- a/services/document_service.py
+++ b/services/document_service.py
@@ -153,40 +153,64 @@ class DocumentService:
                         all_placeholders_in_template.add(field_code)
             
             # 扫描表格中的占位符
-            for table in doc.tables:
+            for table_idx, table in enumerate(doc.tables):
                 try:
                     if not table.rows:
                         continue
-                    for row in table.rows:
+                    
+                    # 安全地获取表格行数
+                    try:
+                        row_count = len(table.rows)
+                    except Exception as e:
+                        print(f"[WARN] 扫描表格 {table_idx} 时无法获取行数，跳过该表格: {str(e)}")
+                        continue
+                    
+                    for row_idx, row in enumerate(table.rows):
                         try:
                             # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
                             if not hasattr(row, 'cells'):
                                 continue
+                            
                             # 使用 try-except 包裹，防止 IndexError
                             try:
                                 cells = row.cells
+                                if not cells:
+                                    continue
                             except (IndexError, AttributeError) as e:
-                                print(f"[WARN] 无法访问表格行的单元格，跳过该行: {str(e)}")
+                                print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 时无法访问单元格，跳过该行: {str(e)}")
                                 continue
                             
-                            for cell in cells:
+                            for cell_idx, cell in enumerate(cells):
                                 try:
-                                    if hasattr(cell, 'paragraphs'):
-                                        for paragraph in cell.paragraphs:
+                                    if not hasattr(cell, 'paragraphs'):
+                                        continue
+                                    
+                                    # 安全地获取paragraphs列表
+                                    try:
+                                        paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                    except (IndexError, AttributeError) as e:
+                                        print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落，跳过: {str(e)}")
+                                        continue
+                                    
+                                    for paragraph in paragraphs:
+                                        try:
                                             text = paragraph.text
                                             matches = placeholder_pattern.findall(text)
                                             for match in matches:
                                                 field_code = match.strip()
                                                 if field_code:
                                                     all_placeholders_in_template.add(field_code)
+                                        except Exception as e:
+                                            print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落时出错，跳过: {str(e)}")
+                                            continue
                                 except Exception as e:
-                                    print(f"[WARN] 处理表格单元格时出错，跳过: {str(e)}")
+                                    print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时出错，跳过: {str(e)}")
                                     continue
                         except Exception as e:
-                            print(f"[WARN] 处理表格行时出错，跳过: {str(e)}")
+                            print(f"[WARN] 扫描表格 {table_idx} 行 {row_idx} 时出错，跳过: {str(e)}")
                             continue
                 except Exception as e:
-                    print(f"[WARN] 处理表格时出错，跳过该表格: {str(e)}")
+                    print(f"[WARN] 扫描表格 {table_idx} 时出错，跳过该表格: {str(e)}")
                     continue
             
             print(f"[DEBUG] 模板中发现 {len(all_placeholders_in_template)} 个不同的占位符: {sorted(all_placeholders_in_template)}")
@@ -411,36 +435,85 @@ class DocumentService:
             
             # 替换表格中的占位符
             try:
-                for table in doc.tables:
-                    if not table.rows:
-                        continue
-                    for row in table.rows:
-                        if not row.cells:
+                for table_idx, table in enumerate(doc.tables):
+                    try:
+                        if not table.rows:
                             continue
-                        for cell in row.cells:
+                        
+                        # 安全地获取表格行数
+                        try:
+                            row_count = len(table.rows)
+                        except Exception as e:
+                            print(f"[WARN] 无法获取表格 {table_idx} 的行数，跳过该表格: {str(e)}")
+                            continue
+                        
+                        for row_idx, row in enumerate(table.rows):
                             try:
-                                # 检查cell是否有paragraphs属性且不为空
-                                if hasattr(cell, 'paragraphs'):
-                                    # 安全地获取paragraphs列表
-                                    paragraphs = list(cell.paragraphs) if cell.paragraphs else []
-                                    for paragraph in paragraphs:
-                                        before_text = paragraph.text
-                                        replace_placeholder_in_paragraph(paragraph)
-                                        after_text = paragraph.text
-                                        if before_text != after_text:
-                                            # 检查哪些占位符被替换了
-                                            for field_code in field_data.keys():
-                                                placeholder = f"{{{{{field_code}}}}}"
-                                                if placeholder in before_text and placeholder not in after_text:
-                                                    replaced_placeholders.add(field_code)
-                                                    total_replacements += before_text.count(placeholder)
+                                # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
+                                if not hasattr(row, 'cells'):
+                                    continue
+                                
+                                # 使用 try-except 包裹，防止 IndexError
+                                try:
+                                    # 尝试获取cells，如果失败则跳过该行
+                                    cells = row.cells
+                                    if not cells:
+                                        continue
+                                except (IndexError, AttributeError) as e:
+                                    print(f"[WARN] 表格 {table_idx} 行 {row_idx} 无法访问单元格，跳过该行: {str(e)}")
+                                    continue
+                                
+                                # 安全地遍历单元格
+                                for cell_idx, cell in enumerate(cells):
+                                    try:
+                                        # 检查cell是否有paragraphs属性且不为空
+                                        if not hasattr(cell, 'paragraphs'):
+                                            continue
+                                        
+                                        # 安全地获取paragraphs列表
+                                        try:
+                                            paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                        except (IndexError, AttributeError) as e:
+                                            print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 无法访问段落，跳过: {str(e)}")
+                                            continue
+                                        
+                                        for para_idx, paragraph in enumerate(paragraphs):
+                                            try:
+                                                before_text = paragraph.text
+                                                replace_placeholder_in_paragraph(paragraph)
+                                                after_text = paragraph.text
+                                                if before_text != after_text:
+                                                    # 检查哪些占位符被替换了
+                                                    for field_code in field_data.keys():
+                                                        placeholder = f"{{{{{field_code}}}}}"
+                                                        if placeholder in before_text and placeholder not in after_text:
+                                                            replaced_placeholders.add(field_code)
+                                                            total_replacements += before_text.count(placeholder)
+                                            except Exception as e:
+                                                # 如果单个段落处理失败，记录错误但继续处理其他段落
+                                                print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落 {para_idx} 处理出错: {str(e)}")
+                                                continue
+                                    except Exception as e:
+                                        # 如果单个单元格处理失败，记录错误但继续处理其他单元格
+                                        print(f"[WARN] 表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 处理出错: {str(e)}")
+                                        continue
                             except Exception as e:
-                                # 如果单个单元格处理失败，记录错误但继续处理其他单元格
-                                print(f"[WARN] 处理表格单元格时出错: {str(e)}")
-                                pass
+                                # 如果单个行处理失败，记录错误但继续处理其他行
+                                print(f"[WARN] 表格 {table_idx} 行 {row_idx} 处理出错: {str(e)}")
+                                import traceback
+                                print(traceback.format_exc())
+                                continue
+                    except Exception as e:
+                        # 如果单个表格处理失败，记录错误但继续处理其他表格
+                        print(f"[WARN] 表格 {table_idx} 处理出错: {str(e)}")
+                        import traceback
+                        print(traceback.format_exc())
+                        continue
             except Exception as e:
                 # 如果表格处理失败，记录错误但继续保存文档
                 print(f"[WARN] 处理表格时出错: {str(e)}")
+                import traceback
+                print(traceback.format_exc())
                 pass
             
             # 第三步：验证是否还有未替换的占位符（使用正则表达式匹配所有可能的占位符）
@@ -454,40 +527,64 @@ class DocumentService:
                         remaining_placeholders.add(field_code)
             
             # 检查表格中的占位符
-            for table in doc.tables:
+            for table_idx, table in enumerate(doc.tables):
                 try:
                     if not table.rows:
                         continue
-                    for row in table.rows:
+                    
+                    # 安全地获取表格行数
+                    try:
+                        row_count = len(table.rows)
+                    except Exception as e:
+                        print(f"[WARN] 验证表格 {table_idx} 时无法获取行数，跳过该表格: {str(e)}")
+                        continue
+                    
+                    for row_idx, row in enumerate(table.rows):
                         try:
                             # 安全地访问 row.cells，避免 docx 库在处理异常表格结构时的 bug
                             if not hasattr(row, 'cells'):
                                 continue
+                            
                             # 使用 try-except 包裹，防止 IndexError
                             try:
                                 cells = row.cells
+                                if not cells:
+                                    continue
                             except (IndexError, AttributeError) as e:
-                                print(f"[WARN] 无法访问表格行的单元格，跳过该行: {str(e)}")
+                                print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 时无法访问单元格，跳过该行: {str(e)}")
                                 continue
                             
-                            for cell in cells:
+                            for cell_idx, cell in enumerate(cells):
                                 try:
-                                    if hasattr(cell, 'paragraphs'):
-                                        for paragraph in cell.paragraphs:
+                                    if not hasattr(cell, 'paragraphs'):
+                                        continue
+                                    
+                                    # 安全地获取paragraphs列表
+                                    try:
+                                        paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                    except (IndexError, AttributeError) as e:
+                                        print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时无法访问段落，跳过: {str(e)}")
+                                        continue
+                                    
+                                    for paragraph in paragraphs:
+                                        try:
                                             text = paragraph.text
                                             matches = placeholder_pattern.findall(text)
                                             for match in matches:
                                                 field_code = match.strip()
                                                 if field_code:
                                                     remaining_placeholders.add(field_code)
+                                        except Exception as e:
+                                            print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 段落时出错，跳过: {str(e)}")
+                                            continue
                                 except Exception as e:
-                                    print(f"[WARN] 处理表格单元格时出错，跳过: {str(e)}")
+                                    print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 单元格 {cell_idx} 时出错，跳过: {str(e)}")
                                     continue
                         except Exception as e:
-                            print(f"[WARN] 处理表格行时出错，跳过: {str(e)}")
+                            print(f"[WARN] 验证表格 {table_idx} 行 {row_idx} 时出错，跳过: {str(e)}")
                             continue
                 except Exception as e:
-                    print(f"[WARN] 处理表格时出错，跳过该表格: {str(e)}")
+                    print(f"[WARN] 验证表格 {table_idx} 时出错，跳过该表格: {str(e)}")
                     continue
             
             # 输出统计信息
@@ -580,28 +677,50 @@ class DocumentService:
                             if field_code:
                                 verify_placeholders_in_saved.add(field_code)
                     
-                    for table in verify_doc.tables:
+                    for table_idx, table in enumerate(verify_doc.tables):
                         try:
                             if not table.rows:
                                 continue
-                            for row in table.rows:
+                            
+                            # 安全地获取表格行数
+                            try:
+                                row_count = len(table.rows)
+                            except Exception:
+                                continue
+                            
+                            for row_idx, row in enumerate(table.rows):
                                 try:
                                     if not hasattr(row, 'cells'):
                                         continue
+                                    
                                     try:
                                         cells = row.cells
+                                        if not cells:
+                                            continue
                                     except (IndexError, AttributeError):
                                         continue
-                                    for cell in cells:
+                                    
+                                    for cell_idx, cell in enumerate(cells):
                                         try:
-                                            if hasattr(cell, 'paragraphs'):
-                                                for paragraph in cell.paragraphs:
+                                            if not hasattr(cell, 'paragraphs'):
+                                                continue
+                                            
+                                            # 安全地获取paragraphs列表
+                                            try:
+                                                paragraphs = list(cell.paragraphs) if cell.paragraphs else []
+                                            except (IndexError, AttributeError):
+                                                continue
+                                            
+                                            for paragraph in paragraphs:
+                                                try:
                                                     text = paragraph.text
                                                     matches = placeholder_pattern.findall(text)
                                                     for match in matches:
                                                         field_code = match.strip()
                                                         if field_code:
                                                             verify_placeholders_in_saved.add(field_code)
+                                                except Exception:
+                                                    continue
                                         except Exception:
                                             continue
                                 except Exception:
diff --git a/修复Ubuntu占位符替换问题.md b/修复Ubuntu占位符替换问题.md
index d2f446d..e6b63fb 100644
--- a/修复Ubuntu占位符替换问题.md
+++ b/修复Ubuntu占位符替换问题.md
@@ -6,10 +6,14 @@
 
 ## 可能的原因
 
-1. **文件保存问题**：在Ubuntu上，文件保存后可能没有正确刷新到磁盘
-2. **编码问题**：Windows和Ubuntu在处理文件编码时可能有差异
-3. **文件系统同步问题**：Ubuntu上可能需要显式同步文件系统
-4. **占位符匹配问题**：可能因为编码或格式问题导致占位符没有被正确识别
+1. **表格处理错误（主要原因）**：在处理表格时出现 `list index out of range` 错误，导致表格中的占位符没有被替换
+   - python-docx 库在处理某些表格结构时可能出现索引越界错误
+   - 错误发生在访问 `row.cells` 时，导致整个行被跳过
+   - 虽然代码有异常处理，但错误处理不够完善，导致表格中的占位符没有被处理
+2. **文件保存问题**：在Ubuntu上，文件保存后可能没有正确刷新到磁盘
+3. **编码问题**：Windows和Ubuntu在处理文件编码时可能有差异
+4. **文件系统同步问题**：Ubuntu上可能需要显式同步文件系统
+5. **占位符匹配问题**：可能因为编码或格式问题导致占位符没有被正确识别
 
 ## 修复内容
 
@@ -18,19 +22,31 @@
 - 添加了正则表达式匹配作为备用方案，确保能够识别各种格式的占位符
 - 增强了替换逻辑，使用多种方式检查占位符是否存在
 
-### 2. 增强文件保存验证
+### 2. **修复表格处理中的索引越界错误（重要）**
+
+- **问题**：在处理表格时出现 `list index out of range` 错误，导致表格中的占位符没有被替换
+- **修复**：
+  - 增强了表格访问的安全性，使用多层异常处理
+  - 安全地访问 `row.cells`，避免索引越界错误
+  - 安全地访问 `cell.paragraphs`，处理异常表格结构
+  - 添加了详细的错误日志，包含表格、行、单元格的索引信息
+  - 即使某个单元格处理失败，也会继续处理其他单元格
+- **影响范围**：扫描占位符、替换占位符、验证占位符的所有表格处理部分
+
+### 3. 增强文件保存验证
 
 - 保存后验证文件是否存在且大小大于0
 - 在非Windows系统上显式同步文件系统（使用`os.sync()`）
 - 保存后重新打开文件验证内容是否正确
 
-### 3. 增强调试信息
+### 4. 增强调试信息
 
 - 添加了详细的调试日志，记录每个替换步骤
 - 在替换前后验证占位符是否存在
 - 记录替换的详细信息，便于诊断问题
+- 表格处理错误现在包含表格、行、单元格的索引信息
 
-### 4. 增强替换后验证
+### 5. 增强替换后验证
 
 - 替换后立即验证段落文本是否还包含占位符
 - 如果验证失败，记录详细的错误信息
@@ -65,16 +81,19 @@
      - `[DEBUG] 保存前验证：检查文档中是否还有占位符...`
      - `[DEBUG] 保存后验证通过：文件中所有占位符已替换`
      - `[WARN] 保存后验证：文件中仍有占位符: ...`
+   - **重要**：检查是否还有 `[WARN] 处理表格时出错: list index out of range` 错误
+     - 如果还有，但错误信息现在包含表格、行、单元格的索引，说明错误处理已改进
+     - 即使有警告，表格中的占位符也应该被正确替换了
 
 2. **测试文档生成**：
    - 在Ubuntu服务器上生成谈话审批表
-   - 下载生成的文档，检查占位符是否被正确替换
-   - 如果仍有问题，查看日志中的警告信息
+   - 下载生成的文档，**特别检查表格中的占位符是否被正确替换**
+   - 如果仍有问题，查看日志中的警告信息，特别是表格处理相关的警告
 
 3. **对比测试**：
    - 在Windows和Ubuntu上使用相同的数据生成文档
-   - 对比生成的文档内容
-   - 查看日志中的差异
+   - 对比生成的文档内容，**特别关注表格部分**
+   - 查看日志中的差异，确认表格处理是否正常
 
 ## 如果问题仍然存在