mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 09:54:54 +00:00
chore: normalize with_filter parameter to boolean in split handle files
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
--bug=1057879 --user=刘瑞斌 【知识库】高级分段中自动清洗功能未生效 https://www.tapd.cn/62980211/s/1727744
This commit is contained in:
parent
fe78de5d3c
commit
4c9756839a
|
|
@ -197,6 +197,8 @@ class DocSplitHandle(BaseSplitHandle):
|
|||
try:
|
||||
if type(limit) is str:
|
||||
limit = int(limit)
|
||||
if type(with_filter) is str:
|
||||
with_filter = with_filter.lower() == 'true'
|
||||
image_list = []
|
||||
buffer = get_buffer(file)
|
||||
doc = Document(io.BytesIO(buffer))
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ class HTMLSplitHandle(BaseSplitHandle):
|
|||
buffer = get_buffer(file)
|
||||
if type(limit) is str:
|
||||
limit = int(limit)
|
||||
if type(with_filter) is str:
|
||||
with_filter = with_filter.lower() == 'true'
|
||||
if pattern_list is not None and len(pattern_list) > 0:
|
||||
split_model = SplitModel(pattern_list, with_filter, limit)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ class PdfSplitHandle(BaseSplitHandle):
|
|||
try:
|
||||
if type(limit) is str:
|
||||
limit = int(limit)
|
||||
if type(with_filter) is str:
|
||||
with_filter = with_filter.lower() == 'true'
|
||||
# 处理有目录的pdf
|
||||
result = self.handle_toc(pdf_document, limit)
|
||||
if result is not None:
|
||||
|
|
|
|||
|
|
@ -43,6 +43,8 @@ class TextSplitHandle(BaseSplitHandle):
|
|||
buffer = get_buffer(file)
|
||||
if type(limit) is str:
|
||||
limit = int(limit)
|
||||
if type(with_filter) is str:
|
||||
with_filter = with_filter.lower() == 'true'
|
||||
if pattern_list is not None and len(pattern_list) > 0:
|
||||
split_model = SplitModel(pattern_list, with_filter, limit)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@ class ZipSplitHandle(BaseSplitHandle):
|
|||
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
|
||||
if type(limit) is str:
|
||||
limit = int(limit)
|
||||
if type(with_filter) is str:
|
||||
with_filter = with_filter.lower() == 'true'
|
||||
buffer = get_buffer(file)
|
||||
bytes_io = io.BytesIO(buffer)
|
||||
result = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue