From e24a2001c5a5a3f3cb9b6f776bfeafb8ed062e20 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Thu, 26 Jun 2025 16:23:32 +0800 Subject: [PATCH] feat: refine regex patterns in text_split_handle for improved comment detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1057526 --user=刘瑞斌 【知识库】markdown文件导入知识库,分段详情中代码块展示异常 https://www.tapd.cn/62980211/s/1719131 --- .../handle/impl/text/text_split_handle.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/apps/common/handle/impl/text/text_split_handle.py b/apps/common/handle/impl/text/text_split_handle.py index b5e6ded3c..d412e1ce0 100644 --- a/apps/common/handle/impl/text/text_split_handle.py +++ b/apps/common/handle/impl/text/text_split_handle.py @@ -15,12 +15,14 @@ from charset_normalizer import detect from common.handle.base_split_handle import BaseSplitHandle from common.utils.split_model import SplitModel -default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), - re.compile('(?<=\\n)(?