diff --git a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py index 4753768b9..1df3d273d 100644 --- a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py +++ b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py @@ -27,7 +27,7 @@ class ISpeechToTextNode(INode): self.node_params_serializer.data.get('audio_list')[1:]) for audio in res: if 'file_id' not in audio: - raise ValueError("参数值错误: 上传的图片中缺少file_id,音频上传失败") + raise ValueError("参数值错误: 上传的音频中缺少file_id,音频上传失败") return self.execute(audio=res, **self.node_params_serializer.data, **self.flow_params_serializer.data) diff --git a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py index 9a17ad6e2..72c4d3be5 100644 --- a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py +++ b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py @@ -58,7 +58,9 @@ class BaseTextToSpeechNode(ITextToSpeechNode): file_url = FileSerializer(data={'file': file, 'meta': meta}).upload() # 拼接一个audio标签的src属性 audio_label = f'' - return NodeResult({'answer': audio_label, 'result': audio_label}, {}) + file_id = file_url.split('/')[-1] + audio_list = [{'file_id': file_id, 'file_name': file_name, 'url': file_url}] + return NodeResult({'answer': audio_label, 'result': audio_list}, {}) def get_details(self, index: int, **kwargs): return { diff --git a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py index 8ef806474..acb755bb3 100644 --- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py +++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tts.py @@ -85,10 +85,10 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): "uid": "uid" }, "audio": { - "encoding": "mp3", - "volume_ratio": 1.0, - "pitch_ratio": 1.0, - } | self.params, + "encoding": "mp3", + "volume_ratio": 1.0, + "pitch_ratio": 1.0, + } | self.params, "request": { "reqid": str(uuid.uuid4()), "text": '', @@ -113,7 +113,7 @@ class VolcanicEngineTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): result = b'' async with websockets.connect(self.volcanic_api_url, extra_headers=header, ping_interval=None, ssl=ssl_context) as ws: - lines = text.split('\n') + lines = [text[i:i + 200] for i in range(0, len(text), 200)] for line in lines: if self.is_table_format_chars_only(line): continue