diff --git a/apps/models_provider/impl/vllm_model_provider/credential/whisper_stt.py b/apps/models_provider/impl/vllm_model_provider/credential/whisper_stt.py index f65b38eac..5844d0a4d 100644 --- a/apps/models_provider/impl/vllm_model_provider/credential/whisper_stt.py +++ b/apps/models_provider/impl/vllm_model_provider/credential/whisper_stt.py @@ -13,7 +13,7 @@ from models_provider.base_model_provider import BaseModelCredential, ValidCode class VLLMWhisperModelParams(BaseForm): Language = forms.TextInputField( - TooltipLabel(_('Language'), + TooltipLabel(_('language'), _("If not passed, the default value is 'zh'")), required=True, default_value='zh', diff --git a/apps/models_provider/impl/vllm_model_provider/model/whisper_sst.py b/apps/models_provider/impl/vllm_model_provider/model/whisper_sst.py index 922d934a8..ca502c4b0 100644 --- a/apps/models_provider/impl/vllm_model_provider/model/whisper_sst.py +++ b/apps/models_provider/impl/vllm_model_provider/model/whisper_sst.py @@ -52,11 +52,11 @@ class VllmWhisperSpeechToText(MaxKBBaseModel, BaseSpeechToText): api_key=self.api_key, base_url=base_url ) - + buf = audio_file.read() filter_params = {k: v for k, v in self.params.items() if k not in {'model_id', 'use_local', 'streaming'}} transcription_params = { 'model': self.model, - 'file': audio_file, + 'file': buf, 'language': 'zh', } result = client.audio.transcriptions.create(