diff --git a/apps/setting/models_provider/impl/wenxin_model_provider/credential/embedding.py b/apps/setting/models_provider/impl/wenxin_model_provider/credential/embedding.py new file mode 100644 index 000000000..25af4d5ab --- /dev/null +++ b/apps/setting/models_provider/impl/wenxin_model_provider/credential/embedding.py @@ -0,0 +1,42 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: embedding.py + @date:2024/10/17 15:40 + @desc: +""" +from typing import Dict + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class QianfanEmbeddingCredential(BaseForm, BaseModelCredential): + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + self.valid_form(model_credential) + try: + model = provider.get_model(model_type, model_name, model_credential) + model.embed_query('你好') + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'qianfan_sk': super().encryption(model.get('qianfan_sk', ''))} + + qianfan_ak = forms.PasswordInputField('API Key', required=True) + + qianfan_sk = forms.PasswordInputField("Secret Key", required=True) diff --git a/apps/setting/models_provider/impl/wenxin_model_provider/model/embedding.py b/apps/setting/models_provider/impl/wenxin_model_provider/model/embedding.py new file mode 100644 index 000000000..d46ac51ea --- /dev/null +++ b/apps/setting/models_provider/impl/wenxin_model_provider/model/embedding.py @@ -0,0 +1,23 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: embedding.py + @date:2024/10/17 16:48 + @desc: +""" +from typing import Dict + +from langchain_community.embeddings import QianfanEmbeddingsEndpoint + +from setting.models_provider.base_model_provider import MaxKBBaseModel + + +class QianfanEmbeddings(MaxKBBaseModel, QianfanEmbeddingsEndpoint): + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + return QianfanEmbeddings( + model=model_name, + qianfan_ak=model_credential.get('qianfan_ak'), + qianfan_sk=model_credential.get('qianfan_sk'), + ) diff --git a/apps/setting/models_provider/impl/wenxin_model_provider/wenxin_model_provider.py b/apps/setting/models_provider/impl/wenxin_model_provider/wenxin_model_provider.py index 92e955fa2..7944d703e 100644 --- a/apps/setting/models_provider/impl/wenxin_model_provider/wenxin_model_provider.py +++ b/apps/setting/models_provider/impl/wenxin_model_provider/wenxin_model_provider.py @@ -11,11 +11,14 @@ import os from common.util.file_util import get_file_content from setting.models_provider.base_model_provider import ModelProvideInfo, ModelTypeConst, ModelInfo, IModelProvider, \ ModelInfoManage +from setting.models_provider.impl.wenxin_model_provider.credential.embedding import QianfanEmbeddingCredential from setting.models_provider.impl.wenxin_model_provider.credential.llm import WenxinLLMModelCredential +from setting.models_provider.impl.wenxin_model_provider.model.embedding import QianfanEmbeddings from setting.models_provider.impl.wenxin_model_provider.model.llm import QianfanChatModel from smartdoc.conf import PROJECT_DIR win_xin_llm_model_credential = WenxinLLMModelCredential() +qianfan_embedding_credential = QianfanEmbeddingCredential() model_info_list = [ModelInfo('ERNIE-Bot-4', 'ERNIE-Bot-4是百度自行研发的大语言模型,覆盖海量中文数据,具有更强的对话问答、内容创作生成等能力。', ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel), @@ -41,13 +44,16 @@ model_info_list = [ModelInfo('ERNIE-Bot-4', '千帆团队在Llama-2-7b基础上的中文增强版本,在CMMLU、C-EVAL等中文知识库上表现优异。', ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel) ] - +embedding_model_info = ModelInfo('Embedding-V1', + 'Embedding-V1是一个基于百度文心大模型技术的文本表示模型,可以将文本转化为用数值表示的向量形式,用于文本检索、信息推荐、知识挖掘等场景。 Embedding-V1提供了Embeddings接口,可以根据输入内容生成对应的向量表示。您可以通过调用该接口,将文本输入到模型中,获取到对应的向量表示,从而进行后续的文本处理和分析。', + ModelTypeConst.EMBEDDING, qianfan_embedding_credential, QianfanEmbeddings) model_info_manage = ModelInfoManage.builder().append_model_info_list(model_info_list).append_default_model_info( ModelInfo('ERNIE-Bot-4', 'ERNIE-Bot-4是百度自行研发的大语言模型,覆盖海量中文数据,具有更强的对话问答、内容创作生成等能力。', ModelTypeConst.LLM, win_xin_llm_model_credential, - QianfanChatModel)).build() + QianfanChatModel)).append_model_info(embedding_model_info).append_default_model_info( + embedding_model_info).build() class WenxinModelProvider(IModelProvider):