From d9df013e33a02c288d8503b2cdce11b2a19a61b8 Mon Sep 17 00:00:00 2001
From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:37:51 +0800
Subject: [PATCH] fix: Part of the docx document is parsed incorrectly (#1981)

---
 apps/common/handle/impl/doc_split_handle.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py
index d377abeba..4170eb70e 100644
--- a/apps/common/handle/impl/doc_split_handle.py
+++ b/apps/common/handle/impl/doc_split_handle.py
@@ -113,8 +113,10 @@ class DocSplitHandle(BaseSplitHandle):
     def paragraph_to_md(paragraph: Paragraph, doc: Document, images_list, get_image_id):
         try:
             psn = paragraph.style.name
-            if psn.startswith('Heading'):
-                title = "".join(["#" for i in range(int(psn.replace("Heading ", '')))]) + " " + paragraph.text
+            if psn.startswith('Heading') or psn.startswith('TOC 标题') or psn.startswith('标题'):
+                title = "".join(["#" for i in range(
+                    int(psn.replace("Heading ", '').replace('TOC 标题', '').replace('标题',
+                                                                                    '')))]) + " " + paragraph.text
                 images = reduce(lambda x, y: [*x, *y],
                                 [get_paragraph_element_images(e, doc, images_list, get_image_id) for e in
                                  paragraph._element],
@@ -202,4 +204,4 @@ class DocSplitHandle(BaseSplitHandle):
             return content
         except BaseException as e:
             traceback.print_exception(e)
-            return f'{e}'
\ No newline at end of file
+            return f'{e}'