fix: fix bailian stt

2025-12-26 01:33:05 +00:00 · 2024-12-23 11:25:00 +08:00 · 2024-12-23 11:25:00 +08:00 · 53f4e11145
parent 1113e1ffec
commit 53f4e11145
1 changed files with 8 additions and 1 deletions
--- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/stt.py
+++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/stt.py
@ -4,6 +4,7 @@ from typing import Dict

 import dashscope
 from dashscope.audio.asr import (Recognition)
+from pydub import AudioSegment

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_stt import BaseSpeechToText
@ -40,7 +41,6 @@ class AliyunBaiLianSpeechToText(MaxKBBaseModel, BaseSpeechToText):
        dashscope.api_key = self.api_key
        recognition = Recognition(model=self.model,
                                  format='mp3',
-                                  sample_rate=16000,
                                  callback=None)
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            # 将上传的文件保存到临时文件中
@ -49,6 +49,13 @@ class AliyunBaiLianSpeechToText(MaxKBBaseModel, BaseSpeechToText):
            temp_file_path = temp_file.name

        try:
+            audio = AudioSegment.from_file(temp_file_path)
+            if audio.channels != 1:
+                audio = audio.set_channels(1)
+            audio = audio.set_frame_rate(16000)
+
+            # 将转换后的音频文件保存到临时文件中
+            audio.export(temp_file_path, format='mp3')
            # 识别临时文件
            result = recognition.call(temp_file_path)
            text = ''