From 6c2b2f6c17023e5ba36153fae8d522893a07e448 Mon Sep 17 00:00:00 2001 From: zhangzhanwei Date: Tue, 16 Sep 2025 16:13:12 +0800 Subject: [PATCH] feat: STT model params --- .../credential/omni_stt.py | 14 ++-- .../model/omni_stt.py | 2 +- .../tencent_model_provider/credential/stt.py | 66 +++++++++---------- .../impl/tencent_model_provider/model/stt.py | 2 +- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/omni_stt.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/omni_stt.py index 274e56678..0b704fbc4 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/omni_stt.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/omni_stt.py @@ -8,12 +8,12 @@ from common.forms import BaseForm, PasswordInputField, TooltipLabel from models_provider.base_model_provider import BaseModelCredential, ValidCode from django.utils.translation import gettext as _ -# class AliyunBaiLianOmiSTTModelParams(BaseForm): -# CueWord = forms.TextInputField( -# TooltipLabel(_('CueWord'), _('If not passed, the default value is What is this audio saying? Only answer the audio content')), -# required=True, -# default_value='这段音频在说什么,只回答音频的内容', -# ) +class AliyunBaiLianOmiSTTModelParams(BaseForm): + CueWord = forms.TextInputField( + TooltipLabel(_('CueWord'), _('If not passed, the default value is What is this audio saying? Only answer the audio content')), + required=True, + default_value='这段音频在说什么,只回答音频的内容', + ) class AliyunBaiLianOmiSTTModelCredential(BaseForm, BaseModelCredential): @@ -70,4 +70,4 @@ class AliyunBaiLianOmiSTTModelCredential(BaseForm, BaseModelCredential): def get_model_params_setting_form(self, model_name): - pass \ No newline at end of file + return AliyunBaiLianOmiSTTModelParams() \ No newline at end of file diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py index 23a060288..56e060f6b 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py @@ -68,7 +68,7 @@ class AliyunBaiLianOmiSpeechToText(MaxKBBaseModel, BaseSpeechToText): "format": "mp3", }, }, - {"type": "text", "text": '这段音频在说什么,只回答音频的内容'}, + {"type": "text", "text": self.params.get('CueWord')}, ], }, ], diff --git a/apps/models_provider/impl/tencent_model_provider/credential/stt.py b/apps/models_provider/impl/tencent_model_provider/credential/stt.py index a055ccb1d..3eea500f2 100644 --- a/apps/models_provider/impl/tencent_model_provider/credential/stt.py +++ b/apps/models_provider/impl/tencent_model_provider/credential/stt.py @@ -8,38 +8,38 @@ from django.utils.translation import gettext_lazy as _, gettext from models_provider.base_model_provider import BaseModelCredential, ValidCode -# class TencentSSTModelParams(BaseForm): -# EngSerViceType = forms.SingleSelect( -# TooltipLabel(_('Engine model type'), _('If not passed, the default value is 16k_zh (Chinese universal)')), -# required=True, -# default_value='16k_zh', -# option_list=[ -# {"value": "8k_zh", "label": _("Chinese telephone universal")}, -# {"value": "8k_en", "label": _("English telephone universal")}, -# {"value": "16k_zh", "label": _("Commonly used in Chinese")}, -# {"value": "16k_zh-PY", "label": _("Chinese, English, and Guangdong")}, -# {"value": "16k_zh_medical", "label": _("Chinese medical")}, -# {"value": "16k_en", "label": _("English")}, -# {"value": "16k_yue", "label": _("Cantonese")}, -# {"value": "16k_ja", "label": _("Japanese")}, -# {"value": "16k_ko", "label": _("Korean")}, -# {"value": "16k_vi", "label": _("Vietnamese")}, -# {"value": "16k_ms", "label": _("Malay language")}, -# {"value": "16k_id", "label": _("Indonesian language")}, -# {"value": "16k_fil", "label": _("Filipino language")}, -# {"value": "16k_th", "label": _("Thai")}, -# {"value": "16k_pt", "label": _("Portuguese")}, -# {"value": "16k_tr", "label": _("Turkish")}, -# {"value": "16k_ar", "label": _("Arabic")}, -# {"value": "16k_es", "label": _("Spanish")}, -# {"value": "16k_hi", "label": _("Hindi")}, -# {"value": "16k_fr", "label": _("French")}, -# {"value": "16k_de", "label": _("German")}, -# {"value": "16k_zh_dialect", "label": _("Multiple dialects, supporting 23 dialects")} -# ], -# value_field='value', -# text_field='label' -# ) +class TencentSSTModelParams(BaseForm): + EngSerViceType = forms.SingleSelect( + TooltipLabel(_('Engine model type'), _('If not passed, the default value is 16k_zh (Chinese universal)')), + required=True, + default_value='16k_zh', + option_list=[ + {"value": "8k_zh", "label": _("Chinese telephone universal")}, + {"value": "8k_en", "label": _("English telephone universal")}, + {"value": "16k_zh", "label": _("Commonly used in Chinese")}, + {"value": "16k_zh-PY", "label": _("Chinese, English, and Guangdong")}, + {"value": "16k_zh_medical", "label": _("Chinese medical")}, + {"value": "16k_en", "label": _("English")}, + {"value": "16k_yue", "label": _("Cantonese")}, + {"value": "16k_ja", "label": _("Japanese")}, + {"value": "16k_ko", "label": _("Korean")}, + {"value": "16k_vi", "label": _("Vietnamese")}, + {"value": "16k_ms", "label": _("Malay language")}, + {"value": "16k_id", "label": _("Indonesian language")}, + {"value": "16k_fil", "label": _("Filipino language")}, + {"value": "16k_th", "label": _("Thai")}, + {"value": "16k_pt", "label": _("Portuguese")}, + {"value": "16k_tr", "label": _("Turkish")}, + {"value": "16k_ar", "label": _("Arabic")}, + {"value": "16k_es", "label": _("Spanish")}, + {"value": "16k_hi", "label": _("Hindi")}, + {"value": "16k_fr", "label": _("French")}, + {"value": "16k_de", "label": _("German")}, + {"value": "16k_zh_dialect", "label": _("Multiple dialects, supporting 23 dialects")} + ], + value_field='value', + text_field='label' + ) class TencentSTTModelCredential(BaseForm, BaseModelCredential): REQUIRED_FIELDS = ["SecretId", "SecretKey"] @@ -87,4 +87,4 @@ class TencentSTTModelCredential(BaseForm, BaseModelCredential): SecretKey = forms.PasswordInputField('SecretKey', required=True) def get_model_params_setting_form(self, model_name): - pass + return TencentSSTModelParams() diff --git a/apps/models_provider/impl/tencent_model_provider/model/stt.py b/apps/models_provider/impl/tencent_model_provider/model/stt.py index 87a1a05e4..a501fed19 100644 --- a/apps/models_provider/impl/tencent_model_provider/model/stt.py +++ b/apps/models_provider/impl/tencent_model_provider/model/stt.py @@ -65,7 +65,7 @@ class TencentSpeechToText(MaxKBBaseModel, BaseSpeechToText): # 实例化一个请求对象,每个接口都会对应一个request对象 req = models.SentenceRecognitionRequest() params = { - "EngSerViceType": '16k_zh', + "EngSerViceType": self.params.get('EngSerViceType'), "SourceType": 1, "VoiceFormat": "mp3", "Data": _v.decode(),