fix: fix tts node and stt node error

--bug=1050817 --user=王孝刚【应用编排】文本转语音，字数不是很多但是模型提示超长了 https://www.tapd.cn/57709429/s/1636787 --bug=1050821 --user=王孝刚【应用编排】语音转文本错误信息是图片缺少file_id https://www.tapd.cn/57709429/s/1636786
2025-12-26 01:33:05 +00:00 · 2024-12-25 15:14:32 +08:00 · 2024-12-25 15:14:32 +08:00 · bd8d848321
parent ae7c446e81
commit bd8d848321
3 changed files with 9 additions and 7 deletions
--- a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py
+++ b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py
@ -27,7 +27,7 @@ class ISpeechToTextNode(INode):
                                                       self.node_params_serializer.data.get('audio_list')[1:])
        for audio in res:
            if 'file_id' not in audio:
-                raise ValueError("参数值错误: 上传的图片中缺少file_id，音频上传失败")
+                raise ValueError("参数值错误: 上传的音频中缺少file_id，音频上传失败")

        return self.execute(audio=res, **self.node_params_serializer.data, **self.flow_params_serializer.data)

--- a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py
+++ b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py
@ -58,7 +58,9 @@ class BaseTextToSpeechNode(ITextToSpeechNode):
        file_url = FileSerializer(data={'file': file, 'meta': meta}).upload()
        # 拼接一个audio标签的src属性
        audio_label = f'<audio src="{file_url}" controls style = "width: 300px; height: 43px"></audio>'
-        return NodeResult({'answer': audio_label, 'result': audio_label}, {})
+        file_id = file_url.split('/')[-1]
+        audio_list = [{'file_id': file_id, 'file_name': file_name, 'url': file_url}]
+        return NodeResult({'answer': audio_label, 'result': audio_list}, {})

    def get_details(self, index: int, **kwargs):
        return {
--- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py
@ -85,10 +85,10 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
                "uid": "uid"
            },
            "audio": {
-                "encoding": "mp3",
-                "volume_ratio": 1.0,
-                "pitch_ratio": 1.0,
-            } | self.params,
+                         "encoding": "mp3",
+                         "volume_ratio": 1.0,
+                         "pitch_ratio": 1.0,
+                     } | self.params,
            "request": {
                "reqid": str(uuid.uuid4()),
                "text": '',
@ -113,7 +113,7 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
        result = b''
        async with websockets.connect(self.volcanic_api_url, extra_headers=header, ping_interval=None,
                                      ssl=ssl_context) as ws:
-            lines = text.split('\n')
+            lines = [text[i:i + 200] for i in range(0, len(text), 200)]
            for line in lines:
                if self.is_table_format_chars_only(line):
                    continue