diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py index e209e770b..401d12ee9 100644 --- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/embedding.py @@ -6,6 +6,7 @@ @date:2024/10/16 16:34 @desc: """ +from functools import reduce from typing import Dict, List from langchain_community.embeddings import DashScopeEmbeddings @@ -14,6 +15,12 @@ from langchain_community.embeddings.dashscope import embed_with_retry from setting.models_provider.base_model_provider import MaxKBBaseModel +def proxy_embed_documents(texts: List[str], step_size, embed_documents): + value = [embed_documents(texts[start_index:start_index + step_size]) for start_index in + range(0, len(texts), step_size)] + return reduce(lambda x, y: [*x, *y], value, []) + + class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings): @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): @@ -23,6 +30,11 @@ class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings): ) def embed_documents(self, texts: List[str]) -> List[List[float]]: + if self.model == 'text-embedding-v3': + return proxy_embed_documents(texts, 6, self._embed_documents) + return self._embed_documents(texts) + + def _embed_documents(self, texts: List[str]) -> List[List[float]]: """Call out to DashScope's embedding endpoint for embedding search docs. Args: