From bd8d848321556f4d27c7774ac1d2e52d2252aa2d Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Wed, 25 Dec 2024 15:14:32 +0800 Subject: [PATCH] fix: fix tts node and stt node error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1050817 --user=王孝刚 【应用编排】文本转语音,字数不是很多但是模型提示超长了 https://www.tapd.cn/57709429/s/1636787 --bug=1050821 --user=王孝刚 【应用编排】语音转文本错误信息是图片缺少file_id https://www.tapd.cn/57709429/s/1636786 --- .../speech_to_text_step_node/i_speech_to_text_node.py | 2 +- .../impl/base_text_to_speech_node.py | 4 +++- .../impl/volcanic_engine_model_provider/model/tts.py | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py index 4753768b9..1df3d273d 100644 --- a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py +++ b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py @@ -27,7 +27,7 @@ class ISpeechToTextNode(INode): self.node_params_serializer.data.get('audio_list')[1:]) for audio in res: if 'file_id' not in audio: - raise ValueError("参数值错误: 上传的图片中缺少file_id,音频上传失败") + raise ValueError("参数值错误: 上传的音频中缺少file_id,音频上传失败") return self.execute(audio=res, **self.node_params_serializer.data, **self.flow_params_serializer.data) diff --git a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py index 9a17ad6e2..72c4d3be5 100644 --- a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py +++ b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py @@ -58,7 +58,9 @@ class BaseTextToSpeechNode(ITextToSpeechNode): file_url = FileSerializer(data={'file': file, 'meta': meta}).upload() # 拼接一个audio标签的src属性 audio_label = f'' - return NodeResult({'answer': audio_label, 'result': audio_label}, {}) + file_id = file_url.split('/')[-1] + audio_list = [{'file_id': file_id, 'file_name': file_name, 'url': file_url}] + return NodeResult({'answer': audio_label, 'result': audio_list}, {}) def get_details(self, index: int, **kwargs): return { diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py index 8ef806474..acb755bb3 100644 --- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py @@ -85,10 +85,10 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): "uid": "uid" }, "audio": { - "encoding": "mp3", - "volume_ratio": 1.0, - "pitch_ratio": 1.0, - } | self.params, + "encoding": "mp3", + "volume_ratio": 1.0, + "pitch_ratio": 1.0, + } | self.params, "request": { "reqid": str(uuid.uuid4()), "text": '', @@ -113,7 +113,7 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): result = b'' async with websockets.connect(self.volcanic_api_url, extra_headers=header, ping_interval=None, ssl=ssl_context) as ws: - lines = text.split('\n') + lines = [text[i:i + 200] for i in range(0, len(text), 200)] for line in lines: if self.is_table_format_chars_only(line): continue