From a827396d07e62195729c6292f48b0440961f14ab Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 17 Dec 2024 12:50:24 +0800 Subject: [PATCH 1/6] feat: Gemini Image understand model --- .../gemini_model_provider/credential/image.py | 64 +++++++++++++++++++ .../gemini_model_provider.py | 43 ++++++++++--- .../impl/gemini_model_provider/model/image.py | 24 +++++++ 3 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/credential/image.py create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/model/image.py diff --git a/apps/setting/models_provider/impl/gemini_model_provider/credential/image.py b/apps/setting/models_provider/impl/gemini_model_provider/credential/image.py new file mode 100644 index 000000000..33cc60bbd --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/credential/image.py @@ -0,0 +1,64 @@ +# coding=utf-8 +import base64 +import os +from typing import Dict + +from langchain_core.messages import HumanMessage + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + +class GeminiImageModelParams(BaseForm): + temperature = forms.SliderField(TooltipLabel('温度', '较高的数值会使输出更加随机,而较低的数值会使其更加集中和确定'), + required=True, default_value=0.7, + _min=0.1, + _max=1.0, + _step=0.01, + precision=2) + + max_tokens = forms.SliderField( + TooltipLabel('输出最大Tokens', '指定模型可生成的最大token个数'), + required=True, default_value=800, + _min=1, + _max=100000, + _step=1, + precision=0) + + + +class GeminiImageModelCredential(BaseForm, BaseModelCredential): + api_key = forms.PasswordInputField('API Key', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + res = model.stream([HumanMessage(content=[{"type": "text", "text": "你好"}])]) + for chunk in res: + print(chunk) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + return GeminiImageModelParams() diff --git a/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py index b6dd442ca..a9acd40cf 100644 --- a/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py +++ b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py @@ -11,24 +11,47 @@ import os from common.util.file_util import get_file_content from setting.models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, ModelTypeConst, \ ModelInfoManage +from setting.models_provider.impl.gemini_model_provider.credential.image import GeminiImageModelCredential from setting.models_provider.impl.gemini_model_provider.credential.llm import GeminiLLMModelCredential +from setting.models_provider.impl.gemini_model_provider.model.image import GeminiImage from setting.models_provider.impl.gemini_model_provider.model.llm import GeminiChatModel from smartdoc.conf import PROJECT_DIR gemini_llm_model_credential = GeminiLLMModelCredential() +gemini_image_model_credential = GeminiImageModelCredential() -gemini_1_pro = ModelInfo('gemini-1.0-pro', '最新的Gemini 1.0 Pro模型,随Google更新而更新', - ModelTypeConst.LLM, - gemini_llm_model_credential, - GeminiChatModel) +model_info_list = [ + ModelInfo('gemini-1.0-pro', '最新的Gemini 1.0 Pro模型,随Google更新而更新', + ModelTypeConst.LLM, + gemini_llm_model_credential, + GeminiChatModel), + ModelInfo('gemini-1.0-pro-vision', '最新的Gemini 1.0 Pro Vision模型,随Google更新而更新', + ModelTypeConst.LLM, + gemini_llm_model_credential, + GeminiChatModel), +] -gemini_1_pro_vision = ModelInfo('gemini-1.0-pro-vision', '最新的Gemini 1.0 Pro Vision模型,随Google更新而更新', - ModelTypeConst.LLM, - gemini_llm_model_credential, - GeminiChatModel) +model_image_info_list = [ + ModelInfo('gemini-1.5-flash', '最新的Gemini 1.5 Flash模型,随Google更新而更新', + ModelTypeConst.IMAGE, + gemini_image_model_credential, + GeminiImage), + ModelInfo('gemini-1.5-pro', '最新的Gemini 1.5 Flash模型,随Google更新而更新', + ModelTypeConst.IMAGE, + gemini_image_model_credential, + GeminiImage), +] -model_info_manage = ModelInfoManage.builder().append_model_info(gemini_1_pro).append_model_info( - gemini_1_pro_vision).append_default_model_info(gemini_1_pro).build() + + +model_info_manage = ( + ModelInfoManage.builder() + .append_model_info_list(model_info_list) + .append_model_info_list(model_image_info_list) + .append_default_model_info(model_info_list[0]) + .append_default_model_info(model_image_info_list[0]) + .build() +) class GeminiModelProvider(IModelProvider): diff --git a/apps/setting/models_provider/impl/gemini_model_provider/model/image.py b/apps/setting/models_provider/impl/gemini_model_provider/model/image.py new file mode 100644 index 000000000..2e48a81b2 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/model/image.py @@ -0,0 +1,24 @@ +from typing import Dict + +from langchain_google_genai import ChatGoogleGenerativeAI + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class GeminiImage(MaxKBBaseModel, ChatGoogleGenerativeAI): + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs) + return GeminiImage( + model=model_name, + google_api_key=model_credential.get('api_key'), + streaming=True, + **optional_params, + ) From 28acc2e48bdd22c0266328825a0709f116337b1e Mon Sep 17 00:00:00 2001 From: wangdan-fit2cloud Date: Tue, 17 Dec 2024 14:37:00 +0800 Subject: [PATCH 2/6] style: updata audio icon --- ui/src/assets/icon_file-audio.svg | 5 +++++ .../component/FileUploadSettingDialog.vue | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 ui/src/assets/icon_file-audio.svg diff --git a/ui/src/assets/icon_file-audio.svg b/ui/src/assets/icon_file-audio.svg new file mode 100644 index 000000000..13f1f7216 --- /dev/null +++ b/ui/src/assets/icon_file-audio.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/ui/src/workflow/nodes/base-node/component/FileUploadSettingDialog.vue b/ui/src/workflow/nodes/base-node/component/FileUploadSettingDialog.vue index 05c67b9f8..ddbb1a007 100644 --- a/ui/src/workflow/nodes/base-node/component/FileUploadSettingDialog.vue +++ b/ui/src/workflow/nodes/base-node/component/FileUploadSettingDialog.vue @@ -39,6 +39,7 @@ class="card-checkbox cursor w-full mb-8" :class="form_data.document ? 'active' : ''" style="--el-card-padding: 8px 16px" + @click.stop="form_data.document = !form_data.document" >
@@ -48,7 +49,10 @@ 需要使用“文档内容提取”节点解析文档内容
- +
@@ -65,24 +70,26 @@ 需要使用“图片理解”节点解析图片内容
- +
+
- +

音频(MP3)

- 所选模型支持接收音频或与语音转文本节点配合使用 + 需要使用“语音转文本”节点解析音频内容
- +
From c58514765d1556672900fb6132ad488b2bcad5ba Mon Sep 17 00:00:00 2001 From: wangdan-fit2cloud Date: Tue, 17 Dec 2024 14:51:45 +0800 Subject: [PATCH 3/6] style: update icon --- ui/src/assets/icon_text-image.svg | 6 ++++++ ui/src/workflow/common/data.ts | 16 ++++++++-------- .../workflow/icons/image-generate-node-icon.vue | 4 ++-- 3 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 ui/src/assets/icon_text-image.svg diff --git a/ui/src/assets/icon_text-image.svg b/ui/src/assets/icon_text-image.svg new file mode 100644 index 000000000..12b6b6145 --- /dev/null +++ b/ui/src/assets/icon_text-image.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/ui/src/workflow/common/data.ts b/ui/src/workflow/common/data.ts index 9e10784bd..21af2e04c 100644 --- a/ui/src/workflow/common/data.ts +++ b/ui/src/workflow/common/data.ts @@ -270,7 +270,7 @@ export const speechToTextNode = { } export const textToSpeechNode = { type: WorkflowType.TextToSpeechNode, - text: '将文本通过语音合成模型转换为音频文件', + text: '将文本通过语音合成模型转换为音频', label: '文本转语音', height: 252, properties: { @@ -287,17 +287,17 @@ export const textToSpeechNode = { } export const menuNodes = [ aiChatNode, + imageUnderstandNode, + imageGenerateNode, searchDatasetNode, - questionNode, + rerankerNode, conditionNode, replyNode, - rerankerNode, - documentExtractNode, - imageUnderstandNode, formNode, + questionNode, + documentExtractNode, speechToTextNode, - textToSpeechNode, - imageGenerateNode + textToSpeechNode ] /** @@ -390,7 +390,7 @@ export const nodeDict: any = { [WorkflowType.ImageUnderstandNode]: imageUnderstandNode, [WorkflowType.TextToSpeechNode]: textToSpeechNode, [WorkflowType.SpeechToTextNode]: speechToTextNode, - [WorkflowType.ImageGenerateNode]: imageGenerateNode + [WorkflowType.ImageGenerateNode]: imageGenerateNode } export function isWorkFlow(type: string | undefined) { return type === 'WORK_FLOW' diff --git a/ui/src/workflow/icons/image-generate-node-icon.vue b/ui/src/workflow/icons/image-generate-node-icon.vue index 6f58417d2..64ca192b1 100644 --- a/ui/src/workflow/icons/image-generate-node-icon.vue +++ b/ui/src/workflow/icons/image-generate-node-icon.vue @@ -1,6 +1,6 @@ From b9013d72adf960d6864ab2efe46734ee97214408 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 17 Dec 2024 15:21:52 +0800 Subject: [PATCH 4/6] feat: Volcanic Engine Access Key ID and Secret Access Key --- .../impl/volcanic_engine_model_provider/credential/tti.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tti.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tti.py index 3c980778d..9e6a7967d 100644 --- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tti.py +++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tti.py @@ -28,8 +28,8 @@ class VolcanicEngineTTIModelGeneralParams(BaseForm): class VolcanicEngineTTIModelCredential(BaseForm, BaseModelCredential): - access_key = forms.PasswordInputField('Access Key', required=True) - secret_key = forms.PasswordInputField('Secret Key', required=True) + access_key = forms.PasswordInputField('Access Key ID', required=True) + secret_key = forms.PasswordInputField('Secret Access Key', required=True) def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, raise_exception=False): From 26ba8938778842f91c90d4920b65c0303b9e7d65 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 17 Dec 2024 15:38:52 +0800 Subject: [PATCH 5/6] fix: The basic information node repeatedly turns on and off the file upload switch, and the audio variable of the starting node will increase repeatedly. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1050529 --user=刘瑞斌 【应用】-高级编排应用设置,基础信息节点反复开启关闭文件上传开关,开始节点的音频变量会重复增加 https://www.tapd.cn/57709429/s/1632488 --- ui/src/workflow/nodes/start-node/index.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/workflow/nodes/start-node/index.vue b/ui/src/workflow/nodes/start-node/index.vue index 8bec6ce39..20fa97417 100644 --- a/ui/src/workflow/nodes/start-node/index.vue +++ b/ui/src/workflow/nodes/start-node/index.vue @@ -69,7 +69,7 @@ const refreshFileUploadConfig = () => { .map((v: any) => cloneDeep(v.properties.node_data.file_upload_setting)) .filter((v: any) => v) - fields = fields.filter((item: any) => item.value !== 'image' && item.value !== 'document') + fields = fields.filter((item: any) => item.value !== 'image' && item.value !== 'document' && item.value !== 'audio' && item.value !== 'video') if (form_data.length === 0) { set(props.nodeModel.properties.config, 'fields', fields) From d1135dc7bcd0f064974911c265bf582ee9837dc9 Mon Sep 17 00:00:00 2001 From: wangdan-fit2cloud Date: Tue, 17 Dec 2024 16:02:56 +0800 Subject: [PATCH 6/6] style: Optimize style --- .../ai-chat/ExecutionDetailDialog.vue | 35 ------------------- .../ai-chat/ParagraphSourceDialog.vue | 2 +- ui/src/components/card-add/index.vue | 2 +- .../items/complex/ArrayObjectCard.vue | 2 +- ui/src/styles/element-plus.scss | 6 ++++ .../component/XPackDisplaySettingDialog.vue | 7 +--- ui/src/views/application-workflow/index.vue | 7 +--- ui/src/views/log/index.vue | 4 +-- ui/src/views/template/index.vue | 4 ++- ui/src/views/theme/LoginPreview.vue | 2 +- 10 files changed, 17 insertions(+), 54 deletions(-) diff --git a/ui/src/components/ai-chat/ExecutionDetailDialog.vue b/ui/src/components/ai-chat/ExecutionDetailDialog.vue index cf31e2be8..f0bd346f6 100644 --- a/ui/src/components/ai-chat/ExecutionDetailDialog.vue +++ b/ui/src/components/ai-chat/ExecutionDetailDialog.vue @@ -526,41 +526,6 @@