From 80415c937261374752514807e4057a0585baed01 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:05:21 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93=E6=96=87=E6=A1=A3=E4=B8=AD=E8=8B=A5=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E4=B8=8D=E5=AD=98=E5=9C=A8=EF=BC=8C=E5=89=8D=E7=AB=AF=E5=AF=B9?= =?UTF-8?q?=E8=AF=9D=E9=97=AA=E7=83=81=E9=97=AE=E9=A2=98=20(#1483)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/chunk/impl/mark_chunk_handle.py | 11 +++++-- .../tencent_model_provider/model/embedding.py | 2 +- ui/src/components/markdown/MdRenderer.vue | 30 ++++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/apps/common/chunk/impl/mark_chunk_handle.py b/apps/common/chunk/impl/mark_chunk_handle.py index 4f8623c9c..5bca2f445 100644 --- a/apps/common/chunk/impl/mark_chunk_handle.py +++ b/apps/common/chunk/impl/mark_chunk_handle.py @@ -22,14 +22,19 @@ class MarkChunkHandle(IChunkHandle): for chunk in chunk_list: chunk_result = re.findall(split_chunk_pattern, chunk, flags=re.DOTALL) for c_r in chunk_result: - result.append(c_r) + if len(c_r.strip()) > 0: + result.append(c_r.strip()) + other_chunk_list = re.split(split_chunk_pattern, chunk, flags=re.DOTALL) for other_chunk in other_chunk_list: if len(other_chunk) > 0: if len(other_chunk) < max_chunk_len: - result.append(other_chunk) + if len(other_chunk.strip()) > 0: + result.append(other_chunk.strip()) else: max_chunk_list = re.findall(max_chunk_pattern, other_chunk, flags=re.DOTALL) for m_c in max_chunk_list: - result.append(m_c) + if len(m_c.strip()) > 0: + result.append(m_c.strip()) + return result diff --git a/apps/setting/models_provider/impl/tencent_model_provider/model/embedding.py b/apps/setting/models_provider/impl/tencent_model_provider/model/embedding.py index c9ad57e26..659a5ac12 100644 --- a/apps/setting/models_provider/impl/tencent_model_provider/model/embedding.py +++ b/apps/setting/models_provider/impl/tencent_model_provider/model/embedding.py @@ -17,7 +17,7 @@ class TencentEmbeddingModel(MaxKBBaseModel, Embeddings): request = GetEmbeddingRequest() request.Input = text res = self.client.GetEmbedding(request) - return res.Data + return res.Data[0].Embedding def __init__(self, secret_id: str, secret_key: str, model_name: str): self.secret_id = secret_id diff --git a/ui/src/components/markdown/MdRenderer.vue b/ui/src/components/markdown/MdRenderer.vue index 76ab43fa4..84861afe3 100644 --- a/ui/src/components/markdown/MdRenderer.vue +++ b/ui/src/components/markdown/MdRenderer.vue @@ -68,9 +68,37 @@ const md_view_list = computed(() => { return md_img_list[Math.floor(index / 2)] } }) - return split_echarts_rander(split_html_rander(split_quick_question(result))) + return split_echarts_rander(split_html_rander(split_quick_question(split_md_img(result)))) }) +const split_md_img = (result: Array) => { + return result + .map((item) => split_md_img_(item)) + .reduce((x: any, y: any) => { + return [...x, ...y] + }, []) +} +const split_md_img_ = (source: string) => { + const temp_md_img_list = source.match(/(!\[.*?\]\(.*?\){.*?})|(!\[.*?\]\(.*?\))/g) + console.log(temp_md_img_list) + const md_img_list = temp_md_img_list ? temp_md_img_list.filter((i) => i) : [] + const split_img_value = source + .split(/(!\[.*?\]\(.*?\){.*?})|(!\[.*?\]\(.*?\))/g) + .filter((item) => item !== undefined) + .filter((item) => !md_img_list?.includes(item)) + const result = Array.from( + { length: md_img_list.length + split_img_value.length }, + (v, i) => i + ).map((index) => { + if (index % 2 == 0) { + return split_img_value[Math.floor(index / 2)] + } else { + return md_img_list[Math.floor(index / 2)] + } + }) + console.log(result) + return result +} const split_quick_question = (result: Array) => { return result .map((item) => split_quick_question_(item))