From 4c9756839a46f217739925afdbd184647d63e49e Mon Sep 17 00:00:00 2001
From: CaptainB <bin@fit2cloud.com>
Date: Thu, 10 Jul 2025 15:06:10 +0800
Subject: [PATCH] chore: normalize with_filter parameter to boolean in split
 handle files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

--bug=1057879 --user=刘瑞斌 【知识库】高级分段中自动清洗功能未生效 https://www.tapd.cn/62980211/s/1727744
---
 apps/common/handle/impl/text/doc_split_handle.py  | 2 ++
 apps/common/handle/impl/text/html_split_handle.py | 2 ++
 apps/common/handle/impl/text/pdf_split_handle.py  | 2 ++
 apps/common/handle/impl/text/text_split_handle.py | 2 ++
 apps/common/handle/impl/text/zip_split_handle.py  | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/apps/common/handle/impl/text/doc_split_handle.py b/apps/common/handle/impl/text/doc_split_handle.py
index b1ed58177..8d3c74767 100644
--- a/apps/common/handle/impl/text/doc_split_handle.py
+++ b/apps/common/handle/impl/text/doc_split_handle.py
@@ -197,6 +197,8 @@ class DocSplitHandle(BaseSplitHandle):
         try:
             if type(limit) is str:
                 limit = int(limit)
+            if type(with_filter) is str:
+                with_filter = with_filter.lower() == 'true'
             image_list = []
             buffer = get_buffer(file)
             doc = Document(io.BytesIO(buffer))
diff --git a/apps/common/handle/impl/text/html_split_handle.py b/apps/common/handle/impl/text/html_split_handle.py
index ca3ad66ab..a82cfdaec 100644
--- a/apps/common/handle/impl/text/html_split_handle.py
+++ b/apps/common/handle/impl/text/html_split_handle.py
@@ -48,6 +48,8 @@ class HTMLSplitHandle(BaseSplitHandle):
         buffer = get_buffer(file)
         if type(limit) is str:
             limit = int(limit)
+        if type(with_filter) is str:
+            with_filter = with_filter.lower() == 'true'
         if pattern_list is not None and len(pattern_list) > 0:
             split_model = SplitModel(pattern_list, with_filter, limit)
         else:
diff --git a/apps/common/handle/impl/text/pdf_split_handle.py b/apps/common/handle/impl/text/pdf_split_handle.py
index f7e41eb2d..d666796b9 100644
--- a/apps/common/handle/impl/text/pdf_split_handle.py
+++ b/apps/common/handle/impl/text/pdf_split_handle.py
@@ -54,6 +54,8 @@ class PdfSplitHandle(BaseSplitHandle):
         try:
             if type(limit) is str:
                 limit = int(limit)
+            if type(with_filter) is str:
+                with_filter = with_filter.lower() == 'true'
             # 处理有目录的pdf
             result = self.handle_toc(pdf_document, limit)
             if result is not None:
diff --git a/apps/common/handle/impl/text/text_split_handle.py b/apps/common/handle/impl/text/text_split_handle.py
index 6eb40f6a1..fab396320 100644
--- a/apps/common/handle/impl/text/text_split_handle.py
+++ b/apps/common/handle/impl/text/text_split_handle.py
@@ -43,6 +43,8 @@ class TextSplitHandle(BaseSplitHandle):
         buffer = get_buffer(file)
         if type(limit) is str:
             limit = int(limit)
+        if type(with_filter) is str:
+            with_filter = with_filter.lower() == 'true'
         if pattern_list is not None and len(pattern_list) > 0:
             split_model = SplitModel(pattern_list, with_filter, limit)
         else:
diff --git a/apps/common/handle/impl/text/zip_split_handle.py b/apps/common/handle/impl/text/zip_split_handle.py
index d448f28cb..5752fe0d7 100644
--- a/apps/common/handle/impl/text/zip_split_handle.py
+++ b/apps/common/handle/impl/text/zip_split_handle.py
@@ -121,6 +121,8 @@ class ZipSplitHandle(BaseSplitHandle):
     def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
         if type(limit) is str:
             limit = int(limit)
+        if type(with_filter) is str:
+            with_filter = with_filter.lower() == 'true'
         buffer = get_buffer(file)
         bytes_io = io.BytesIO(buffer)
         result = []