This commit is contained in:
liqiang-fit2cloud 2024-12-06 10:50:07 +08:00
commit 89e7581517

View File

@ -6,6 +6,7 @@
@date2024/10/16 16:34
@desc:
"""
from functools import reduce
from typing import Dict, List
from langchain_community.embeddings import DashScopeEmbeddings
@ -14,6 +15,12 @@ from langchain_community.embeddings.dashscope import embed_with_retry
from setting.models_provider.base_model_provider import MaxKBBaseModel
def proxy_embed_documents(texts: List[str], step_size, embed_documents):
value = [embed_documents(texts[start_index:start_index + step_size]) for start_index in
range(0, len(texts), step_size)]
return reduce(lambda x, y: [*x, *y], value, [])
class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings):
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
@ -23,6 +30,11 @@ class AliyunBaiLianEmbedding(MaxKBBaseModel, DashScopeEmbeddings):
)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
if self.model == 'text-embedding-v3':
return proxy_embed_documents(texts, 6, self._embed_documents)
return self._embed_documents(texts)
def _embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Call out to DashScope's embedding endpoint for embedding search docs.
Args: