chore: normalize with_filter parameter to boolean in split handle files
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run

--bug=1057879 --user=刘瑞斌 【知识库】高级分段中自动清洗功能未生效 https://www.tapd.cn/62980211/s/1727744
This commit is contained in:
CaptainB 2025-07-10 15:06:10 +08:00
parent fe78de5d3c
commit 4c9756839a
5 changed files with 10 additions and 0 deletions

View File

@ -197,6 +197,8 @@ class DocSplitHandle(BaseSplitHandle):
try:
if type(limit) is str:
limit = int(limit)
if type(with_filter) is str:
with_filter = with_filter.lower() == 'true'
image_list = []
buffer = get_buffer(file)
doc = Document(io.BytesIO(buffer))

View File

@ -48,6 +48,8 @@ class HTMLSplitHandle(BaseSplitHandle):
buffer = get_buffer(file)
if type(limit) is str:
limit = int(limit)
if type(with_filter) is str:
with_filter = with_filter.lower() == 'true'
if pattern_list is not None and len(pattern_list) > 0:
split_model = SplitModel(pattern_list, with_filter, limit)
else:

View File

@ -54,6 +54,8 @@ class PdfSplitHandle(BaseSplitHandle):
try:
if type(limit) is str:
limit = int(limit)
if type(with_filter) is str:
with_filter = with_filter.lower() == 'true'
# 处理有目录的pdf
result = self.handle_toc(pdf_document, limit)
if result is not None:

View File

@ -43,6 +43,8 @@ class TextSplitHandle(BaseSplitHandle):
buffer = get_buffer(file)
if type(limit) is str:
limit = int(limit)
if type(with_filter) is str:
with_filter = with_filter.lower() == 'true'
if pattern_list is not None and len(pattern_list) > 0:
split_model = SplitModel(pattern_list, with_filter, limit)
else:

View File

@ -121,6 +121,8 @@ class ZipSplitHandle(BaseSplitHandle):
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
if type(limit) is str:
limit = int(limit)
if type(with_filter) is str:
with_filter = with_filter.lower() == 'true'
buffer = get_buffer(file)
bytes_io = io.BytesIO(buffer)
result = []