refactor: 应用设置支持配置TTS参数

This commit is contained in:
CaptainB 2024-10-16 12:53:48 +08:00 committed by 刘瑞斌
parent 084f8503e7
commit 04ef6b53d1
17 changed files with 149 additions and 37 deletions

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.15 on 2024-10-16 13:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('application', '0016_alter_chatrecord_problem_text'),
]
operations = [
migrations.AddField(
model_name='application',
name='tts_model_params_setting',
field=models.JSONField(default={}, verbose_name='模型参数相关设置'),
),
]

View File

@ -49,6 +49,7 @@ class Application(AppModelMixin):
dataset_setting = models.JSONField(verbose_name="数据集参数设置", default=get_dataset_setting_dict)
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={})
tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={})
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
icon = models.CharField(max_length=256, verbose_name="应用icon", default="/ui/favicon.ico")
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)

View File

@ -810,7 +810,7 @@ class ApplicationSerializer(serializers.Serializer):
update_keys = ['name', 'desc', 'model_id', 'multiple_rounds_dialogue', 'prologue', 'status',
'dataset_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting',
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting','tts_model_params_setting',
'problem_optimization_prompt']
for update_key in update_keys:
if update_key in instance and instance.get(update_key) is not None:
@ -932,6 +932,8 @@ class ApplicationSerializer(serializers.Serializer):
instance['tts_model_enable'] = node_data['tts_model_enable']
if 'tts_type' in node_data:
instance['tts_type'] = node_data['tts_type']
if 'tts_model_params_setting' in node_data:
instance['tts_model_params_setting'] = node_data['tts_model_params_setting']
break
def speech_to_text(self, file, with_valid=True):
@ -950,7 +952,7 @@ class ApplicationSerializer(serializers.Serializer):
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
if application.tts_model_enable:
model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id)
model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id, **application.tts_model_params_setting)
return model.text_to_speech(text)
class ApplicationKeySerializerModel(serializers.ModelSerializer):

View File

@ -0,0 +1,25 @@
# Generated by Django 4.2.15 on 2024-10-16 13:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('setting', '0007_model_model_params_form'),
]
operations = [
migrations.CreateModel(
name='ModelParam',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('label', models.CharField(max_length=128, verbose_name='参数')),
('field', models.CharField(max_length=256, verbose_name='显示名称')),
('default_value', models.CharField(max_length=1000, verbose_name='默认值')),
('input_type', models.CharField(max_length=32, verbose_name='组件类型')),
('attrs', models.JSONField(verbose_name='属性')),
('required', models.BooleanField(verbose_name='必填')),
],
),
]

View File

@ -12,7 +12,7 @@ class AliyunBaiLianTTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('音色', '中文音色可支持中英文混合场景'),
required=True, default_value='longxiaochun',
text_field='text',
text_field='value',
value_field='value',
option_list=[
{'text': '龙小淳', 'value': 'longxiaochun'},

View File

@ -10,19 +10,23 @@ from setting.models_provider.impl.base_tts import BaseTextToSpeech
class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_key: str
model: str
voice: str
speech_rate: float
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.model = kwargs.get('model')
self.voice = kwargs.get('voice', 'longxiaochun')
self.speech_rate = kwargs.get('speech_rate', 1.0)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']
if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
optional_params['voice'] = model_kwargs['voice']
if 'speech_rate' in model_kwargs and model_kwargs['speech_rate'] is not None:
optional_params['speech_rate'] = model_kwargs['speech_rate']
return AliyunBaiLianTextToSpeech(
model=model_name,
api_key=model_credential.get('api_key'),
@ -33,9 +37,8 @@ class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.text_to_speech('你好')
def text_to_speech(self, text):
voice = "longxiaochun"
dashscope.api_key = self.api_key
synthesizer = SpeechSynthesizer(model=self.model, voice=voice)
synthesizer = SpeechSynthesizer(model=self.model, voice=self.voice, speech_rate=self.speech_rate)
audio = synthesizer.call(text)
if type(audio) == str:
print(audio)

View File

@ -11,7 +11,7 @@ class OpenAITTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('Voice', '尝试不同的声音(合金、回声、寓言、缟玛瑙、新星和闪光),找到一种适合您所需的音调和听众的声音。当前的语音针对英语进行了优化。'),
required=True, default_value='alloy',
text_field='text',
text_field='value',
value_field='value',
option_list=[
{'text': 'alloy', 'value': 'alloy'},

View File

@ -16,20 +16,20 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_base: str
api_key: str
model: str
voice: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.api_base = kwargs.get('api_base')
self.model = kwargs.get('model')
self.voice = kwargs.get('voice', 'alloy')
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']
if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
optional_params['voice'] = model_kwargs['voice']
return OpenAITextToSpeech(
model=model_name,
api_base=model_credential.get('api_base'),
@ -52,7 +52,7 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
)
with client.audio.speech.with_streaming_response.create(
model=self.model,
voice="alloy",
voice=self.voice,
input=text,
) as response:
return response.read()

View File

@ -12,7 +12,7 @@ class VolcanicEngineTTSModelGeneralParams(BaseForm):
voice_type = forms.SingleSelect(
TooltipLabel('音色', '中文音色可支持中英文混合场景'),
required=True, default_value='BV002_streaming',
text_field='text',
text_field='value',
value_field='value',
option_list=[
{'text': '灿灿 2.0', 'value': 'BV700_V2_streaming'},

View File

@ -45,6 +45,8 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
volcanic_cluster: str
volcanic_api_url: str
volcanic_token: str
speed_ratio: float
voice_type: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
@ -52,14 +54,16 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.volcanic_token = kwargs.get('volcanic_token')
self.volcanic_app_id = kwargs.get('volcanic_app_id')
self.volcanic_cluster = kwargs.get('volcanic_cluster')
self.voice_type = kwargs.get('voice_type', 'BV002_streaming')
self.speed_ratio = kwargs.get('speed_ratio', 1.0)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']
if 'voice_type' in model_kwargs and model_kwargs['voice_type'] is not None:
optional_params['voice_type'] = model_kwargs['voice_type']
if 'speed_ratio' in model_kwargs and model_kwargs['speed_ratio'] is not None:
optional_params['speed_ratio'] = model_kwargs['speed_ratio']
return VolcanicEngineTextToSpeech(
volcanic_api_url=model_credential.get('volcanic_api_url'),
volcanic_token=model_credential.get('volcanic_token'),
@ -82,9 +86,9 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
"uid": "uid"
},
"audio": {
"voice_type": "BV002_streaming",
"voice_type": self.voice_type,
"encoding": "mp3",
"speed_ratio": 1.0,
"speed_ratio": self.speed_ratio,
"volume_ratio": 1.0,
"pitch_ratio": 1.0,
},

View File

@ -12,7 +12,7 @@ class XunFeiTTSModelGeneralParams(BaseForm):
vcn = forms.SingleSelect(
TooltipLabel('发音人', '发音人,可选值:请到控制台添加试用或购买发音人,添加后即显示发音人参数值'),
required=True, default_value='xiaoyan',
text_field='text',
text_field='value',
value_field='value',
option_list=[
{'text': '讯飞小燕', 'value': 'xiaoyan'},

View File

@ -37,6 +37,8 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
spark_api_key: str
spark_api_secret: str
spark_api_url: str
speed: int
vcn: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
@ -44,14 +46,16 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.spark_app_id = kwargs.get('spark_app_id')
self.spark_api_key = kwargs.get('spark_api_key')
self.spark_api_secret = kwargs.get('spark_api_secret')
self.vcn = kwargs.get('vcn', 'xiaoyan')
self.speed = kwargs.get('speed', 50)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']
if 'vcn' in model_kwargs and model_kwargs['vcn'] is not None:
optional_params['vcn'] = model_kwargs['vcn']
if 'speed' in model_kwargs and model_kwargs['speed'] is not None:
optional_params['speed'] = model_kwargs['speed']
return XFSparkTextToSpeech(
spark_app_id=model_credential.get('spark_app_id'),
spark_api_key=model_credential.get('spark_api_key'),
@ -134,7 +138,7 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
async def send(self, ws, text):
d = {
"common": {"app_id": self.spark_app_id},
"business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"},
"business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": self.vcn, "speed": self.speed, "tte": "utf8"},
"data": {"status": 2, "text": str(base64.b64encode(text.encode('utf-8')), "UTF8")},
}
d = json.dumps(d)

View File

@ -12,7 +12,7 @@ class XInferenceTTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('音色', ''),
required=True, default_value='中文女',
text_field='text',
text_field='value',
value_field='value',
option_list=[
{'text': '中文女', 'value': '中文女'},

View File

@ -16,20 +16,20 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_base: str
api_key: str
model: str
voice: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.api_base = kwargs.get('api_base')
self.model = kwargs.get('model')
self.voice = kwargs.get('voice', '中文女')
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']
if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
optional_params['voice'] = model_kwargs['voice']
return XInferenceTextToSpeech(
model=model_name,
api_base=model_credential.get('api_base'),
@ -54,7 +54,7 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
with client.audio.speech.with_streaming_response.create(
model=self.model,
voice="中文女",
voice=self.voice,
input=text,
) as response:
return response.read()

View File

@ -15,6 +15,7 @@ interface ApplicationFormType {
type?: string
work_flow?: any
model_params_setting?: any
tts_model_params_setting?: any
stt_model_id?: string
tts_model_id?: string
stt_model_enable?: boolean

View File

@ -395,7 +395,18 @@
<template #label>
<div class="flex-between">
<span class="mr-4">语音播放</span>
<el-switch size="small" v-model="applicationForm.tts_model_enable" />
<div>
<el-button
type="primary"
link
@click="openTTSParamSettingDialog"
:disabled="!applicationForm.tts_model_id && form_data.tts_type === 'BROWSER'"
>
<el-icon class="mr-4"><Setting /></el-icon>
设置
</el-button>
<el-switch size="small" v-model="applicationForm.tts_model_enable" />
</div>
</div>
</template>
<el-radio-group
@ -521,6 +532,7 @@
</el-row>
<AIModeParamSettingDialog ref="AIModeParamSettingDialogRef" @refresh="refreshForm" />
<AIModeParamSettingDialog ref="TTSModeParamSettingDialogRef" @refresh="refreshTTSForm" />
<ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
<AddDatasetDialog
ref="AddDatasetDialogRef"
@ -574,6 +586,7 @@ const defaultPrompt = t('views.application.prompt.defaultPrompt', {
})
const AIModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
const TTSModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
const ParamSettingDialogRef = ref<InstanceType<typeof ParamSettingDialog>>()
const createModelRef = ref<InstanceType<typeof CreateModelDialog>>()
const selectProviderRef = ref<InstanceType<typeof SelectProviderDialog>>()
@ -685,6 +698,15 @@ const openAIParamSettingDialog = () => {
AIModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.model_params_setting)
}
const openTTSParamSettingDialog = () => {
const model_id = applicationForm.value.tts_model_id
if (!model_id) {
MsgSuccess(t('请选择语音播放模型'))
return
}
TTSModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.tts_model_params_setting)
}
const openParamSettingDialog = () => {
ParamSettingDialogRef.value?.open(applicationForm.value)
}
@ -697,6 +719,10 @@ function refreshForm(data: any) {
applicationForm.value.model_params_setting = data
}
function refreshTTSForm(data: any) {
applicationForm.value.tts_model_params_setting = data
}
const openCreateModel = (provider?: Provider) => {
if (provider && provider.provider) {
createModelRef.value?.open(provider)

View File

@ -176,7 +176,18 @@
<template #label>
<div class="flex-between">
<span class="mr-4">语音播放</span>
<el-switch size="small" v-model="form_data.tts_model_enable" />
<div>
<el-button
type="primary"
link
@click="openTTSParamSettingDialog"
:disabled="!form_data.tts_model_id && form_data.tts_type === 'BROWSER'"
>
<el-icon class="mr-4"><Setting /></el-icon>
设置
</el-button>
<el-switch size="small" v-model="form_data.tts_model_enable" />
</div>
</div>
</template>
<el-radio-group v-model="form_data.tts_type" v-show="form_data.tts_model_enable">
@ -246,6 +257,7 @@
<FieldFormDialog ref="FieldFormDialogRef" @refresh="refreshFieldList" />
</NodeContainer>
<AIModeParamSettingDialog ref="TTSModeParamSettingDialogRef" @refresh="refreshTTSForm" />
</template>
<script setup lang="ts">
import { app } from '@/main'
@ -258,8 +270,9 @@ import useStore from '@/stores'
import applicationApi from '@/api/application'
import type { Provider } from '@/api/type/model'
import FieldFormDialog from './component/FieldFormDialog.vue'
import { MsgError, MsgWarning } from '@/utils/message'
import { MsgError, MsgSuccess, MsgWarning } from '@/utils/message'
import { t } from '@/locales'
import AIModeParamSettingDialog from '@/views/application/component/AIModeParamSettingDialog.vue'
const { model } = useStore()
const {
@ -271,6 +284,7 @@ const props = defineProps<{ nodeModel: any }>()
const sttModelOptions = ref<any>(null)
const ttsModelOptions = ref<any>(null)
const providerOptions = ref<Array<Provider>>([])
const TTSModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
const form = {
name: '',
@ -375,6 +389,20 @@ function refreshFieldList(data: any) {
props.nodeModel.graphModel.eventCenter.emit('refreshFieldList', inputFieldList.value)
}
const openTTSParamSettingDialog = () => {
const model_id = form_data.value.tts_model_id
if (!model_id) {
MsgSuccess(t('请选择语音播放模型'))
return
}
TTSModeParamSettingDialogRef.value?.open(model_id, id, form_data.value.tts_model_params_setting)
}
const refreshTTSForm = (data: any) => {
form_data.value.tts_model_params_setting = data
}
onMounted(() => {
set(props.nodeModel, 'validate', validate)
if (props.nodeModel.properties.input_field_list) {