From 7c529c281c39caa125dc9a468f4cfae5238311f5 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:50:50 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E9=98=BF=E9=87=8C?= =?UTF-8?q?=E7=99=BE=E7=82=BC=E9=A1=B9=E7=9B=AE=E6=A8=A1=E5=9E=8Bv3?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E6=96=B9=E5=BC=8F=20(#1773)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../model/embedding.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py index e209e770b..401d12ee9 100644 --- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py @@ -6,6 +6,7 @@ @date:2024/10/16 16:34 @desc: """ +from functools import reduce from typing import Dict, List from langchain_community.embeddings import DashScopeEmbeddings @@ -14,6 +15,12 @@ from langchain_community.embeddings.dashscope import embed_with_retry from setting.models_provider.base_model_provider import MaxKBBaseModel +def proxy_embed_documents(texts: List[str], step_size, embed_documents): + value = [embed_documents(texts[start_index:start_index + step_size]) for start_index in + range(0, len(texts), step_size)] + return reduce(lambda x, y: [*x, *y], value, []) + + class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings): @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): @@ -23,6 +30,11 @@ class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings): ) def embed_documents(self, texts: List[str]) -> List[List[float]]: + if self.model == 'text-embedding-v3': + return proxy_embed_documents(texts, 6, self._embed_documents) + return self._embed_documents(texts) + + def _embed_documents(self, texts: List[str]) -> List[List[float]]: """Call out to DashScope's embedding endpoint for embedding search docs. Args: