diff --git a/apps/application/migrations/0017_application_tts_model_params_setting.py b/apps/application/migrations/0017_application_tts_model_params_setting.py
new file mode 100644
index 000000000..43428841f
--- /dev/null
+++ b/apps/application/migrations/0017_application_tts_model_params_setting.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.15 on 2024-10-16 13:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('application', '0016_alter_chatrecord_problem_text'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='application',
+ name='tts_model_params_setting',
+ field=models.JSONField(default={}, verbose_name='模型参数相关设置'),
+ ),
+ ]
diff --git a/apps/application/models/application.py b/apps/application/models/application.py
index de1868dde..ba4c03e26 100644
--- a/apps/application/models/application.py
+++ b/apps/application/models/application.py
@@ -49,6 +49,7 @@ class Application(AppModelMixin):
dataset_setting = models.JSONField(verbose_name="数据集参数设置", default=get_dataset_setting_dict)
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={})
+ tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={})
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
icon = models.CharField(max_length=256, verbose_name="应用icon", default="/ui/favicon.ico")
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
diff --git a/apps/application/serializers/application_serializers.py b/apps/application/serializers/application_serializers.py
index c06277d7f..bdde5859b 100644
--- a/apps/application/serializers/application_serializers.py
+++ b/apps/application/serializers/application_serializers.py
@@ -810,7 +810,7 @@ class ApplicationSerializer(serializers.Serializer):
update_keys = ['name', 'desc', 'model_id', 'multiple_rounds_dialogue', 'prologue', 'status',
'dataset_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
- 'api_key_is_active', 'icon', 'work_flow', 'model_params_setting',
+ 'api_key_is_active', 'icon', 'work_flow', 'model_params_setting','tts_model_params_setting',
'problem_optimization_prompt']
for update_key in update_keys:
if update_key in instance and instance.get(update_key) is not None:
@@ -932,6 +932,8 @@ class ApplicationSerializer(serializers.Serializer):
instance['tts_model_enable'] = node_data['tts_model_enable']
if 'tts_type' in node_data:
instance['tts_type'] = node_data['tts_type']
+ if 'tts_model_params_setting' in node_data:
+ instance['tts_model_params_setting'] = node_data['tts_model_params_setting']
break
def speech_to_text(self, file, with_valid=True):
@@ -950,7 +952,7 @@ class ApplicationSerializer(serializers.Serializer):
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
if application.tts_model_enable:
- model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id)
+ model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id, **application.tts_model_params_setting)
return model.text_to_speech(text)
class ApplicationKeySerializerModel(serializers.ModelSerializer):
diff --git a/apps/setting/migrations/0008_modelparam.py b/apps/setting/migrations/0008_modelparam.py
new file mode 100644
index 000000000..8be3892b3
--- /dev/null
+++ b/apps/setting/migrations/0008_modelparam.py
@@ -0,0 +1,25 @@
+# Generated by Django 4.2.15 on 2024-10-16 13:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('setting', '0007_model_model_params_form'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='ModelParam',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('label', models.CharField(max_length=128, verbose_name='参数')),
+ ('field', models.CharField(max_length=256, verbose_name='显示名称')),
+ ('default_value', models.CharField(max_length=1000, verbose_name='默认值')),
+ ('input_type', models.CharField(max_length=32, verbose_name='组件类型')),
+ ('attrs', models.JSONField(verbose_name='属性')),
+ ('required', models.BooleanField(verbose_name='必填')),
+ ],
+ ),
+ ]
diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py
index c00ffddd0..640ba7a01 100644
--- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py
+++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py
@@ -12,7 +12,7 @@ class AliyunBaiLianTTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('音色', '中文音色可支持中英文混合场景'),
required=True, default_value='longxiaochun',
- text_field='text',
+ text_field='value',
value_field='value',
option_list=[
{'text': '龙小淳', 'value': 'longxiaochun'},
diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py
index 1e472a742..cdcdd426c 100644
--- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py
@@ -10,19 +10,23 @@ from setting.models_provider.impl.base_tts import BaseTextToSpeech
class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_key: str
model: str
+ voice: str
+ speech_rate: float
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.model = kwargs.get('model')
+ self.voice = kwargs.get('voice', 'longxiaochun')
+ self.speech_rate = kwargs.get('speech_rate', 1.0)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
- if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
- optional_params['max_tokens'] = model_kwargs['max_tokens']
- if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
- optional_params['temperature'] = model_kwargs['temperature']
+ if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
+ optional_params['voice'] = model_kwargs['voice']
+ if 'speech_rate' in model_kwargs and model_kwargs['speech_rate'] is not None:
+ optional_params['speech_rate'] = model_kwargs['speech_rate']
return AliyunBaiLianTextToSpeech(
model=model_name,
api_key=model_credential.get('api_key'),
@@ -33,9 +37,8 @@ class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.text_to_speech('你好')
def text_to_speech(self, text):
- voice = "longxiaochun"
dashscope.api_key = self.api_key
- synthesizer = SpeechSynthesizer(model=self.model, voice=voice)
+ synthesizer = SpeechSynthesizer(model=self.model, voice=self.voice, speech_rate=self.speech_rate)
audio = synthesizer.call(text)
if type(audio) == str:
print(audio)
diff --git a/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py b/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py
index 38d839ca0..96d00131a 100644
--- a/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py
+++ b/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py
@@ -11,7 +11,7 @@ class OpenAITTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('Voice', '尝试不同的声音(合金、回声、寓言、缟玛瑙、新星和闪光),找到一种适合您所需的音调和听众的声音。当前的语音针对英语进行了优化。'),
required=True, default_value='alloy',
- text_field='text',
+ text_field='value',
value_field='value',
option_list=[
{'text': 'alloy', 'value': 'alloy'},
diff --git a/apps/setting/models_provider/impl/openai_model_provider/model/tts.py b/apps/setting/models_provider/impl/openai_model_provider/model/tts.py
index c09754840..2fa63d51d 100644
--- a/apps/setting/models_provider/impl/openai_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/openai_model_provider/model/tts.py
@@ -16,20 +16,20 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_base: str
api_key: str
model: str
+ voice: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.api_base = kwargs.get('api_base')
self.model = kwargs.get('model')
+ self.voice = kwargs.get('voice', 'alloy')
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
- if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
- optional_params['max_tokens'] = model_kwargs['max_tokens']
- if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
- optional_params['temperature'] = model_kwargs['temperature']
+ if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
+ optional_params['voice'] = model_kwargs['voice']
return OpenAITextToSpeech(
model=model_name,
api_base=model_credential.get('api_base'),
@@ -52,7 +52,7 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
)
with client.audio.speech.with_streaming_response.create(
model=self.model,
- voice="alloy",
+ voice=self.voice,
input=text,
) as response:
return response.read()
diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py
index 7565a2546..b565b162b 100644
--- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py
+++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py
@@ -12,7 +12,7 @@ class VolcanicEngineTTSModelGeneralParams(BaseForm):
voice_type = forms.SingleSelect(
TooltipLabel('音色', '中文音色可支持中英文混合场景'),
required=True, default_value='BV002_streaming',
- text_field='text',
+ text_field='value',
value_field='value',
option_list=[
{'text': '灿灿 2.0', 'value': 'BV700_V2_streaming'},
diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py
index 33ee17d64..7a5af8e4b 100644
--- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py
@@ -45,6 +45,8 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
volcanic_cluster: str
volcanic_api_url: str
volcanic_token: str
+ speed_ratio: float
+ voice_type: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -52,14 +54,16 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.volcanic_token = kwargs.get('volcanic_token')
self.volcanic_app_id = kwargs.get('volcanic_app_id')
self.volcanic_cluster = kwargs.get('volcanic_cluster')
+ self.voice_type = kwargs.get('voice_type', 'BV002_streaming')
+ self.speed_ratio = kwargs.get('speed_ratio', 1.0)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
- if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
- optional_params['max_tokens'] = model_kwargs['max_tokens']
- if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
- optional_params['temperature'] = model_kwargs['temperature']
+ if 'voice_type' in model_kwargs and model_kwargs['voice_type'] is not None:
+ optional_params['voice_type'] = model_kwargs['voice_type']
+ if 'speed_ratio' in model_kwargs and model_kwargs['speed_ratio'] is not None:
+ optional_params['speed_ratio'] = model_kwargs['speed_ratio']
return VolcanicEngineTextToSpeech(
volcanic_api_url=model_credential.get('volcanic_api_url'),
volcanic_token=model_credential.get('volcanic_token'),
@@ -82,9 +86,9 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
"uid": "uid"
},
"audio": {
- "voice_type": "BV002_streaming",
+ "voice_type": self.voice_type,
"encoding": "mp3",
- "speed_ratio": 1.0,
+ "speed_ratio": self.speed_ratio,
"volume_ratio": 1.0,
"pitch_ratio": 1.0,
},
diff --git a/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py b/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py
index f0e68b38b..ec9478aae 100644
--- a/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py
+++ b/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py
@@ -12,7 +12,7 @@ class XunFeiTTSModelGeneralParams(BaseForm):
vcn = forms.SingleSelect(
TooltipLabel('发音人', '发音人,可选值:请到控制台添加试用或购买发音人,添加后即显示发音人参数值'),
required=True, default_value='xiaoyan',
- text_field='text',
+ text_field='value',
value_field='value',
option_list=[
{'text': '讯飞小燕', 'value': 'xiaoyan'},
diff --git a/apps/setting/models_provider/impl/xf_model_provider/model/tts.py b/apps/setting/models_provider/impl/xf_model_provider/model/tts.py
index 004b78858..d6635c5dc 100644
--- a/apps/setting/models_provider/impl/xf_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/xf_model_provider/model/tts.py
@@ -37,6 +37,8 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
spark_api_key: str
spark_api_secret: str
spark_api_url: str
+ speed: int
+ vcn: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -44,14 +46,16 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
self.spark_app_id = kwargs.get('spark_app_id')
self.spark_api_key = kwargs.get('spark_api_key')
self.spark_api_secret = kwargs.get('spark_api_secret')
+ self.vcn = kwargs.get('vcn', 'xiaoyan')
+ self.speed = kwargs.get('speed', 50)
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
- if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
- optional_params['max_tokens'] = model_kwargs['max_tokens']
- if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
- optional_params['temperature'] = model_kwargs['temperature']
+ if 'vcn' in model_kwargs and model_kwargs['vcn'] is not None:
+ optional_params['vcn'] = model_kwargs['vcn']
+ if 'speed' in model_kwargs and model_kwargs['speed'] is not None:
+ optional_params['speed'] = model_kwargs['speed']
return XFSparkTextToSpeech(
spark_app_id=model_credential.get('spark_app_id'),
spark_api_key=model_credential.get('spark_api_key'),
@@ -134,7 +138,7 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
async def send(self, ws, text):
d = {
"common": {"app_id": self.spark_app_id},
- "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"},
+ "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": self.vcn, "speed": self.speed, "tte": "utf8"},
"data": {"status": 2, "text": str(base64.b64encode(text.encode('utf-8')), "UTF8")},
}
d = json.dumps(d)
diff --git a/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py b/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py
index d2844739f..0bf3daadd 100644
--- a/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py
@@ -12,7 +12,7 @@ class XInferenceTTSModelGeneralParams(BaseForm):
voice = forms.SingleSelect(
TooltipLabel('音色', ''),
required=True, default_value='中文女',
- text_field='text',
+ text_field='value',
value_field='value',
option_list=[
{'text': '中文女', 'value': '中文女'},
diff --git a/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py b/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py
index 6e6e46aa5..bb8a3faed 100644
--- a/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py
@@ -16,20 +16,20 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
api_base: str
api_key: str
model: str
+ voice: str
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.api_key = kwargs.get('api_key')
self.api_base = kwargs.get('api_base')
self.model = kwargs.get('model')
+ self.voice = kwargs.get('voice', '中文女')
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
- if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
- optional_params['max_tokens'] = model_kwargs['max_tokens']
- if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
- optional_params['temperature'] = model_kwargs['temperature']
+ if 'voice' in model_kwargs and model_kwargs['voice'] is not None:
+ optional_params['voice'] = model_kwargs['voice']
return XInferenceTextToSpeech(
model=model_name,
api_base=model_credential.get('api_base'),
@@ -54,7 +54,7 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
with client.audio.speech.with_streaming_response.create(
model=self.model,
- voice="中文女",
+ voice=self.voice,
input=text,
) as response:
return response.read()
diff --git a/ui/src/api/type/application.ts b/ui/src/api/type/application.ts
index 97be3b257..00ad179fe 100644
--- a/ui/src/api/type/application.ts
+++ b/ui/src/api/type/application.ts
@@ -15,6 +15,7 @@ interface ApplicationFormType {
type?: string
work_flow?: any
model_params_setting?: any
+ tts_model_params_setting?: any
stt_model_id?: string
tts_model_id?: string
stt_model_enable?: boolean
diff --git a/ui/src/views/application/ApplicationSetting.vue b/ui/src/views/application/ApplicationSetting.vue
index d5c1e714d..e65355a0a 100644
--- a/ui/src/views/application/ApplicationSetting.vue
+++ b/ui/src/views/application/ApplicationSetting.vue
@@ -395,7 +395,18 @@
+
>()
+const TTSModeParamSettingDialogRef = ref>()
const ParamSettingDialogRef = ref>()
const createModelRef = ref>()
const selectProviderRef = ref>()
@@ -685,6 +698,15 @@ const openAIParamSettingDialog = () => {
AIModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.model_params_setting)
}
+const openTTSParamSettingDialog = () => {
+ const model_id = applicationForm.value.tts_model_id
+ if (!model_id) {
+ MsgSuccess(t('请选择语音播放模型'))
+ return
+ }
+ TTSModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.tts_model_params_setting)
+}
+
const openParamSettingDialog = () => {
ParamSettingDialogRef.value?.open(applicationForm.value)
}
@@ -697,6 +719,10 @@ function refreshForm(data: any) {
applicationForm.value.model_params_setting = data
}
+function refreshTTSForm(data: any) {
+ applicationForm.value.tts_model_params_setting = data
+}
+
const openCreateModel = (provider?: Provider) => {
if (provider && provider.provider) {
createModelRef.value?.open(provider)
diff --git a/ui/src/workflow/nodes/base-node/index.vue b/ui/src/workflow/nodes/base-node/index.vue
index 144a2ed2d..741093d54 100644
--- a/ui/src/workflow/nodes/base-node/index.vue
+++ b/ui/src/workflow/nodes/base-node/index.vue
@@ -176,7 +176,18 @@
@@ -246,6 +257,7 @@
+