diff --git a/apps/application/migrations/0017_application_tts_model_params_setting.py b/apps/application/migrations/0017_application_tts_model_params_setting.py new file mode 100644 index 000000000..43428841f --- /dev/null +++ b/apps/application/migrations/0017_application_tts_model_params_setting.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.15 on 2024-10-16 13:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('application', '0016_alter_chatrecord_problem_text'), + ] + + operations = [ + migrations.AddField( + model_name='application', + name='tts_model_params_setting', + field=models.JSONField(default={}, verbose_name='模型参数相关设置'), + ), + ] diff --git a/apps/application/models/application.py b/apps/application/models/application.py index de1868dde..ba4c03e26 100644 --- a/apps/application/models/application.py +++ b/apps/application/models/application.py @@ -49,6 +49,7 @@ class Application(AppModelMixin): dataset_setting = models.JSONField(verbose_name="数据集参数设置", default=get_dataset_setting_dict) model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict) model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={}) + tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default={}) problem_optimization = models.BooleanField(verbose_name="问题优化", default=False) icon = models.CharField(max_length=256, verbose_name="应用icon", default="/ui/favicon.ico") work_flow = models.JSONField(verbose_name="工作流数据", default=dict) diff --git a/apps/application/serializers/application_serializers.py b/apps/application/serializers/application_serializers.py index c06277d7f..bdde5859b 100644 --- a/apps/application/serializers/application_serializers.py +++ b/apps/application/serializers/application_serializers.py @@ -810,7 +810,7 @@ class ApplicationSerializer(serializers.Serializer): update_keys = ['name', 'desc', 'model_id', 'multiple_rounds_dialogue', 'prologue', 'status', 'dataset_setting', 'model_setting', 'problem_optimization', 'dialogue_number', 'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type', - 'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', + 'api_key_is_active', 'icon', 'work_flow', 'model_params_setting','tts_model_params_setting', 'problem_optimization_prompt'] for update_key in update_keys: if update_key in instance and instance.get(update_key) is not None: @@ -932,6 +932,8 @@ class ApplicationSerializer(serializers.Serializer): instance['tts_model_enable'] = node_data['tts_model_enable'] if 'tts_type' in node_data: instance['tts_type'] = node_data['tts_type'] + if 'tts_model_params_setting' in node_data: + instance['tts_model_params_setting'] = node_data['tts_model_params_setting'] break def speech_to_text(self, file, with_valid=True): @@ -950,7 +952,7 @@ class ApplicationSerializer(serializers.Serializer): application_id = self.data.get('application_id') application = QuerySet(Application).filter(id=application_id).first() if application.tts_model_enable: - model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id) + model = get_model_instance_by_model_user_id(application.tts_model_id, application.user_id, **application.tts_model_params_setting) return model.text_to_speech(text) class ApplicationKeySerializerModel(serializers.ModelSerializer): diff --git a/apps/setting/migrations/0008_modelparam.py b/apps/setting/migrations/0008_modelparam.py new file mode 100644 index 000000000..8be3892b3 --- /dev/null +++ b/apps/setting/migrations/0008_modelparam.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.15 on 2024-10-16 13:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('setting', '0007_model_model_params_form'), + ] + + operations = [ + migrations.CreateModel( + name='ModelParam', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('label', models.CharField(max_length=128, verbose_name='参数')), + ('field', models.CharField(max_length=256, verbose_name='显示名称')), + ('default_value', models.CharField(max_length=1000, verbose_name='默认值')), + ('input_type', models.CharField(max_length=32, verbose_name='组件类型')), + ('attrs', models.JSONField(verbose_name='属性')), + ('required', models.BooleanField(verbose_name='必填')), + ], + ), + ] diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py index c00ffddd0..640ba7a01 100644 --- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py @@ -12,7 +12,7 @@ class AliyunBaiLianTTSModelGeneralParams(BaseForm): voice = forms.SingleSelect( TooltipLabel('音色', '中文音色可支持中英文混合场景'), required=True, default_value='longxiaochun', - text_field='text', + text_field='value', value_field='value', option_list=[ {'text': '龙小淳', 'value': 'longxiaochun'}, diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py index 1e472a742..cdcdd426c 100644 --- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py @@ -10,19 +10,23 @@ from setting.models_provider.impl.base_tts import BaseTextToSpeech class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): api_key: str model: str + voice: str + speech_rate: float def __init__(self, **kwargs): super().__init__(**kwargs) self.api_key = kwargs.get('api_key') self.model = kwargs.get('model') + self.voice = kwargs.get('voice', 'longxiaochun') + self.speech_rate = kwargs.get('speech_rate', 1.0) @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): optional_params = {} - if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: - optional_params['max_tokens'] = model_kwargs['max_tokens'] - if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: - optional_params['temperature'] = model_kwargs['temperature'] + if 'voice' in model_kwargs and model_kwargs['voice'] is not None: + optional_params['voice'] = model_kwargs['voice'] + if 'speech_rate' in model_kwargs and model_kwargs['speech_rate'] is not None: + optional_params['speech_rate'] = model_kwargs['speech_rate'] return AliyunBaiLianTextToSpeech( model=model_name, api_key=model_credential.get('api_key'), @@ -33,9 +37,8 @@ class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): self.text_to_speech('你好') def text_to_speech(self, text): - voice = "longxiaochun" dashscope.api_key = self.api_key - synthesizer = SpeechSynthesizer(model=self.model, voice=voice) + synthesizer = SpeechSynthesizer(model=self.model, voice=self.voice, speech_rate=self.speech_rate) audio = synthesizer.call(text) if type(audio) == str: print(audio) diff --git a/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py b/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py index 38d839ca0..96d00131a 100644 --- a/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py +++ b/apps/setting/models_provider/impl/openai_model_provider/credential/tts.py @@ -11,7 +11,7 @@ class OpenAITTSModelGeneralParams(BaseForm): voice = forms.SingleSelect( TooltipLabel('Voice', '尝试不同的声音(合金、回声、寓言、缟玛瑙、新星和闪光),找到一种适合您所需的音调和听众的声音。当前的语音针对英语进行了优化。'), required=True, default_value='alloy', - text_field='text', + text_field='value', value_field='value', option_list=[ {'text': 'alloy', 'value': 'alloy'}, diff --git a/apps/setting/models_provider/impl/openai_model_provider/model/tts.py b/apps/setting/models_provider/impl/openai_model_provider/model/tts.py index c09754840..2fa63d51d 100644 --- a/apps/setting/models_provider/impl/openai_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/openai_model_provider/model/tts.py @@ -16,20 +16,20 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech): api_base: str api_key: str model: str + voice: str def __init__(self, **kwargs): super().__init__(**kwargs) self.api_key = kwargs.get('api_key') self.api_base = kwargs.get('api_base') self.model = kwargs.get('model') + self.voice = kwargs.get('voice', 'alloy') @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): optional_params = {} - if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: - optional_params['max_tokens'] = model_kwargs['max_tokens'] - if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: - optional_params['temperature'] = model_kwargs['temperature'] + if 'voice' in model_kwargs and model_kwargs['voice'] is not None: + optional_params['voice'] = model_kwargs['voice'] return OpenAITextToSpeech( model=model_name, api_base=model_credential.get('api_base'), @@ -52,7 +52,7 @@ class OpenAITextToSpeech(MaxKBBaseModel, BaseTextToSpeech): ) with client.audio.speech.with_streaming_response.create( model=self.model, - voice="alloy", + voice=self.voice, input=text, ) as response: return response.read() diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py index 7565a2546..b565b162b 100644 --- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py +++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/credential/tts.py @@ -12,7 +12,7 @@ class VolcanicEngineTTSModelGeneralParams(BaseForm): voice_type = forms.SingleSelect( TooltipLabel('音色', '中文音色可支持中英文混合场景'), required=True, default_value='BV002_streaming', - text_field='text', + text_field='value', value_field='value', option_list=[ {'text': '灿灿 2.0', 'value': 'BV700_V2_streaming'}, diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py index 33ee17d64..7a5af8e4b 100644 --- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py @@ -45,6 +45,8 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): volcanic_cluster: str volcanic_api_url: str volcanic_token: str + speed_ratio: float + voice_type: str def __init__(self, **kwargs): super().__init__(**kwargs) @@ -52,14 +54,16 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): self.volcanic_token = kwargs.get('volcanic_token') self.volcanic_app_id = kwargs.get('volcanic_app_id') self.volcanic_cluster = kwargs.get('volcanic_cluster') + self.voice_type = kwargs.get('voice_type', 'BV002_streaming') + self.speed_ratio = kwargs.get('speed_ratio', 1.0) @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): optional_params = {} - if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: - optional_params['max_tokens'] = model_kwargs['max_tokens'] - if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: - optional_params['temperature'] = model_kwargs['temperature'] + if 'voice_type' in model_kwargs and model_kwargs['voice_type'] is not None: + optional_params['voice_type'] = model_kwargs['voice_type'] + if 'speed_ratio' in model_kwargs and model_kwargs['speed_ratio'] is not None: + optional_params['speed_ratio'] = model_kwargs['speed_ratio'] return VolcanicEngineTextToSpeech( volcanic_api_url=model_credential.get('volcanic_api_url'), volcanic_token=model_credential.get('volcanic_token'), @@ -82,9 +86,9 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): "uid": "uid" }, "audio": { - "voice_type": "BV002_streaming", + "voice_type": self.voice_type, "encoding": "mp3", - "speed_ratio": 1.0, + "speed_ratio": self.speed_ratio, "volume_ratio": 1.0, "pitch_ratio": 1.0, }, diff --git a/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py b/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py index f0e68b38b..ec9478aae 100644 --- a/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py +++ b/apps/setting/models_provider/impl/xf_model_provider/credential/tts.py @@ -12,7 +12,7 @@ class XunFeiTTSModelGeneralParams(BaseForm): vcn = forms.SingleSelect( TooltipLabel('发音人', '发音人,可选值:请到控制台添加试用或购买发音人,添加后即显示发音人参数值'), required=True, default_value='xiaoyan', - text_field='text', + text_field='value', value_field='value', option_list=[ {'text': '讯飞小燕', 'value': 'xiaoyan'}, diff --git a/apps/setting/models_provider/impl/xf_model_provider/model/tts.py b/apps/setting/models_provider/impl/xf_model_provider/model/tts.py index 004b78858..d6635c5dc 100644 --- a/apps/setting/models_provider/impl/xf_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/xf_model_provider/model/tts.py @@ -37,6 +37,8 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): spark_api_key: str spark_api_secret: str spark_api_url: str + speed: int + vcn: str def __init__(self, **kwargs): super().__init__(**kwargs) @@ -44,14 +46,16 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): self.spark_app_id = kwargs.get('spark_app_id') self.spark_api_key = kwargs.get('spark_api_key') self.spark_api_secret = kwargs.get('spark_api_secret') + self.vcn = kwargs.get('vcn', 'xiaoyan') + self.speed = kwargs.get('speed', 50) @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): optional_params = {} - if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: - optional_params['max_tokens'] = model_kwargs['max_tokens'] - if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: - optional_params['temperature'] = model_kwargs['temperature'] + if 'vcn' in model_kwargs and model_kwargs['vcn'] is not None: + optional_params['vcn'] = model_kwargs['vcn'] + if 'speed' in model_kwargs and model_kwargs['speed'] is not None: + optional_params['speed'] = model_kwargs['speed'] return XFSparkTextToSpeech( spark_app_id=model_credential.get('spark_app_id'), spark_api_key=model_credential.get('spark_api_key'), @@ -134,7 +138,7 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): async def send(self, ws, text): d = { "common": {"app_id": self.spark_app_id}, - "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}, + "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": self.vcn, "speed": self.speed, "tte": "utf8"}, "data": {"status": 2, "text": str(base64.b64encode(text.encode('utf-8')), "UTF8")}, } d = json.dumps(d) diff --git a/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py b/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py index d2844739f..0bf3daadd 100644 --- a/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py +++ b/apps/setting/models_provider/impl/xinference_model_provider/credential/tts.py @@ -12,7 +12,7 @@ class XInferenceTTSModelGeneralParams(BaseForm): voice = forms.SingleSelect( TooltipLabel('音色', ''), required=True, default_value='中文女', - text_field='text', + text_field='value', value_field='value', option_list=[ {'text': '中文女', 'value': '中文女'}, diff --git a/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py b/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py index 6e6e46aa5..bb8a3faed 100644 --- a/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py @@ -16,20 +16,20 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): api_base: str api_key: str model: str + voice: str def __init__(self, **kwargs): super().__init__(**kwargs) self.api_key = kwargs.get('api_key') self.api_base = kwargs.get('api_base') self.model = kwargs.get('model') + self.voice = kwargs.get('voice', '中文女') @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): optional_params = {} - if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: - optional_params['max_tokens'] = model_kwargs['max_tokens'] - if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: - optional_params['temperature'] = model_kwargs['temperature'] + if 'voice' in model_kwargs and model_kwargs['voice'] is not None: + optional_params['voice'] = model_kwargs['voice'] return XInferenceTextToSpeech( model=model_name, api_base=model_credential.get('api_base'), @@ -54,7 +54,7 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): with client.audio.speech.with_streaming_response.create( model=self.model, - voice="中文女", + voice=self.voice, input=text, ) as response: return response.read() diff --git a/ui/src/api/type/application.ts b/ui/src/api/type/application.ts index 97be3b257..00ad179fe 100644 --- a/ui/src/api/type/application.ts +++ b/ui/src/api/type/application.ts @@ -15,6 +15,7 @@ interface ApplicationFormType { type?: string work_flow?: any model_params_setting?: any + tts_model_params_setting?: any stt_model_id?: string tts_model_id?: string stt_model_enable?: boolean diff --git a/ui/src/views/application/ApplicationSetting.vue b/ui/src/views/application/ApplicationSetting.vue index d5c1e714d..e65355a0a 100644 --- a/ui/src/views/application/ApplicationSetting.vue +++ b/ui/src/views/application/ApplicationSetting.vue @@ -395,7 +395,18 @@ + >() +const TTSModeParamSettingDialogRef = ref>() const ParamSettingDialogRef = ref>() const createModelRef = ref>() const selectProviderRef = ref>() @@ -685,6 +698,15 @@ const openAIParamSettingDialog = () => { AIModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.model_params_setting) } +const openTTSParamSettingDialog = () => { + const model_id = applicationForm.value.tts_model_id + if (!model_id) { + MsgSuccess(t('请选择语音播放模型')) + return + } + TTSModeParamSettingDialogRef.value?.open(model_id, id, applicationForm.value.tts_model_params_setting) +} + const openParamSettingDialog = () => { ParamSettingDialogRef.value?.open(applicationForm.value) } @@ -697,6 +719,10 @@ function refreshForm(data: any) { applicationForm.value.model_params_setting = data } +function refreshTTSForm(data: any) { + applicationForm.value.tts_model_params_setting = data +} + const openCreateModel = (provider?: Provider) => { if (provider && provider.provider) { createModelRef.value?.open(provider) diff --git a/ui/src/workflow/nodes/base-node/index.vue b/ui/src/workflow/nodes/base-node/index.vue index 144a2ed2d..741093d54 100644 --- a/ui/src/workflow/nodes/base-node/index.vue +++ b/ui/src/workflow/nodes/base-node/index.vue @@ -176,7 +176,18 @@ @@ -246,6 +257,7 @@ +