feat: 支持百度千帆向量模型

This commit is contained in:
shaohuzhang1 2024-10-17 17:43:58 +08:00 committed by shaohuzhang1
parent f85ce4a745
commit d6915f84b9
3 changed files with 73 additions and 2 deletions

View File

@ -0,0 +1,42 @@
# coding=utf-8
"""
@project: MaxKB
@Author
@file embedding.py
@date2024/10/17 15:40
@desc:
"""
from typing import Dict
from common import forms
from common.exception.app_exception import AppApiException
from common.forms import BaseForm
from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode
class QianfanEmbeddingCredential(BaseForm, BaseModelCredential):
def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider,
raise_exception=False):
model_type_list = provider.get_model_type_list()
if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))):
raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持')
self.valid_form(model_credential)
try:
model = provider.get_model(model_type, model_name, model_credential)
model.embed_query('你好')
except Exception as e:
if isinstance(e, AppApiException):
raise e
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}')
else:
return False
return True
def encryption_dict(self, model: Dict[str, object]):
return {**model, 'qianfan_sk': super().encryption(model.get('qianfan_sk', ''))}
qianfan_ak = forms.PasswordInputField('API Key', required=True)
qianfan_sk = forms.PasswordInputField("Secret Key", required=True)

View File

@ -0,0 +1,23 @@
# coding=utf-8
"""
@project: MaxKB
@Author
@file embedding.py
@date2024/10/17 16:48
@desc:
"""
from typing import Dict
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
from setting.models_provider.base_model_provider import MaxKBBaseModel
class QianfanEmbeddings(MaxKBBaseModel, QianfanEmbeddingsEndpoint):
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
return QianfanEmbeddings(
model=model_name,
qianfan_ak=model_credential.get('qianfan_ak'),
qianfan_sk=model_credential.get('qianfan_sk'),
)

View File

@ -11,11 +11,14 @@ import os
from common.util.file_util import get_file_content
from setting.models_provider.base_model_provider import ModelProvideInfo, ModelTypeConst, ModelInfo, IModelProvider, \
ModelInfoManage
from setting.models_provider.impl.wenxin_model_provider.credential.embedding import QianfanEmbeddingCredential
from setting.models_provider.impl.wenxin_model_provider.credential.llm import WenxinLLMModelCredential
from setting.models_provider.impl.wenxin_model_provider.model.embedding import QianfanEmbeddings
from setting.models_provider.impl.wenxin_model_provider.model.llm import QianfanChatModel
from smartdoc.conf import PROJECT_DIR
win_xin_llm_model_credential = WenxinLLMModelCredential()
qianfan_embedding_credential = QianfanEmbeddingCredential()
model_info_list = [ModelInfo('ERNIE-Bot-4',
'ERNIE-Bot-4是百度自行研发的大语言模型覆盖海量中文数据具有更强的对话问答、内容创作生成等能力。',
ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel),
@ -41,13 +44,16 @@ model_info_list = [ModelInfo('ERNIE-Bot-4',
'千帆团队在Llama-2-7b基础上的中文增强版本在CMMLU、C-EVAL等中文知识库上表现优异。',
ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel)
]
embedding_model_info = ModelInfo('Embedding-V1',
'Embedding-V1是一个基于百度文心大模型技术的文本表示模型可以将文本转化为用数值表示的向量形式用于文本检索、信息推荐、知识挖掘等场景。 Embedding-V1提供了Embeddings接口可以根据输入内容生成对应的向量表示。您可以通过调用该接口将文本输入到模型中获取到对应的向量表示从而进行后续的文本处理和分析。',
ModelTypeConst.EMBEDDING, qianfan_embedding_credential, QianfanEmbeddings)
model_info_manage = ModelInfoManage.builder().append_model_info_list(model_info_list).append_default_model_info(
ModelInfo('ERNIE-Bot-4',
'ERNIE-Bot-4是百度自行研发的大语言模型覆盖海量中文数据具有更强的对话问答、内容创作生成等能力。',
ModelTypeConst.LLM,
win_xin_llm_model_credential,
QianfanChatModel)).build()
QianfanChatModel)).append_model_info(embedding_model_info).append_default_model_info(
embedding_model_info).build()
class WenxinModelProvider(IModelProvider):