diff --git a/apps/application/flow/step_node/__init__.py b/apps/application/flow/step_node/__init__.py
index 535560b5f..5e992079b 100644
--- a/apps/application/flow/step_node/__init__.py
+++ b/apps/application/flow/step_node/__init__.py
@@ -21,12 +21,14 @@ from .image_understand_step_node import *
from .image_generate_step_node import *
from .search_dataset_node import *
+from .speech_to_text_step_node import BaseSpeechToTextNode
from .start_node import *
+from .text_to_speech_step_node.impl.base_text_to_speech_node import BaseTextToSpeechNode
node_list = [BaseStartStepNode, BaseChatNode, BaseSearchDatasetNode, BaseQuestionNode, BaseConditionNode, BaseReplyNode,
BaseFunctionNodeNode, BaseFunctionLibNodeNode, BaseRerankerNode, BaseApplicationNode,
BaseDocumentExtractNode,
- BaseImageUnderstandNode, BaseImageGenerateNode, BaseFormNode]
+ BaseImageUnderstandNode, BaseFormNode, BaseSpeechToTextNode, BaseTextToSpeechNode,BaseImageGenerateNode]
def get_node(node_type):
diff --git a/apps/application/flow/step_node/application_node/i_application_node.py b/apps/application/flow/step_node/application_node/i_application_node.py
index 8c4675ea7..c0fb158fd 100644
--- a/apps/application/flow/step_node/application_node/i_application_node.py
+++ b/apps/application/flow/step_node/application_node/i_application_node.py
@@ -14,6 +14,7 @@ class ApplicationNodeSerializer(serializers.Serializer):
user_input_field_list = serializers.ListField(required=False, error_messages=ErrMessage.uuid("用户输入字段"))
image_list = serializers.ListField(required=False, error_messages=ErrMessage.list("图片"))
document_list = serializers.ListField(required=False, error_messages=ErrMessage.list("文档"))
+ audio_list = serializers.ListField(required=False, error_messages=ErrMessage.list("音频"))
child_node = serializers.DictField(required=False, allow_null=True, error_messages=ErrMessage.dict("子节点"))
node_data = serializers.DictField(required=False, allow_null=True, error_messages=ErrMessage.dict("表单数据"))
@@ -43,7 +44,7 @@ class IApplicationNode(INode):
app_document_list[1:])
for document in app_document_list:
if 'file_id' not in document:
- raise ValueError("参数值错误: 上传的文档中缺少file_id")
+ raise ValueError("参数值错误: 上传的文档中缺少file_id,文档上传失败")
app_image_list = self.node_params_serializer.data.get('image_list', [])
if app_image_list and len(app_image_list) > 0:
app_image_list = self.workflow_manage.get_reference_field(
@@ -51,11 +52,22 @@ class IApplicationNode(INode):
app_image_list[1:])
for image in app_image_list:
if 'file_id' not in image:
- raise ValueError("参数值错误: 上传的图片中缺少file_id")
+ raise ValueError("参数值错误: 上传的图片中缺少file_id,图片上传失败")
+
+ app_audio_list = self.node_params_serializer.data.get('audio_list', [])
+ if app_audio_list and len(app_audio_list) > 0:
+ app_audio_list = self.workflow_manage.get_reference_field(
+ app_audio_list[0],
+ app_audio_list[1:])
+ for audio in app_audio_list:
+ if 'file_id' not in audio:
+ raise ValueError("参数值错误: 上传的图片中缺少file_id,音频上传失败")
return self.execute(**self.node_params_serializer.data, **self.flow_params_serializer.data,
app_document_list=app_document_list, app_image_list=app_image_list,
+ app_audio_list=app_audio_list,
message=str(question), **kwargs)
def execute(self, application_id, message, chat_id, chat_record_id, stream, re_chat, client_id, client_type,
- app_document_list=None, app_image_list=None, child_node=None, node_data=None, **kwargs) -> NodeResult:
+ app_document_list=None, app_image_list=None, app_audio_list=None, child_node=None, node_data=None,
+ **kwargs) -> NodeResult:
pass
diff --git a/apps/application/flow/step_node/application_node/impl/base_application_node.py b/apps/application/flow/step_node/application_node/impl/base_application_node.py
index 76f92f878..c6bb29be5 100644
--- a/apps/application/flow/step_node/application_node/impl/base_application_node.py
+++ b/apps/application/flow/step_node/application_node/impl/base_application_node.py
@@ -154,7 +154,7 @@ class BaseApplicationNode(IApplicationNode):
self.answer_text = details.get('answer')
def execute(self, application_id, message, chat_id, chat_record_id, stream, re_chat, client_id, client_type,
- app_document_list=None, app_image_list=None, child_node=None, node_data=None,
+ app_document_list=None, app_image_list=None, app_audio_list=None, child_node=None, node_data=None,
**kwargs) -> NodeResult:
from application.serializers.chat_message_serializers import ChatMessageSerializer
# 生成嵌入应用的chat_id
@@ -167,6 +167,8 @@ class BaseApplicationNode(IApplicationNode):
app_document_list = []
if app_image_list is None:
app_image_list = []
+ if app_audio_list is None:
+ app_audio_list = []
runtime_node_id = None
record_id = None
child_node_value = None
@@ -186,6 +188,7 @@ class BaseApplicationNode(IApplicationNode):
'client_type': client_type,
'document_list': app_document_list,
'image_list': app_image_list,
+ 'audio_list': app_audio_list,
'runtime_node_id': runtime_node_id,
'chat_record_id': record_id,
'child_node': child_node_value,
@@ -234,5 +237,6 @@ class BaseApplicationNode(IApplicationNode):
'global_fields': global_fields,
'document_list': self.workflow_manage.document_list,
'image_list': self.workflow_manage.image_list,
+ 'audio_list': self.workflow_manage.audio_list,
'application_node_dict': self.context.get('application_node_dict')
}
diff --git a/apps/application/flow/step_node/speech_to_text_step_node/__init__.py b/apps/application/flow/step_node/speech_to_text_step_node/__init__.py
new file mode 100644
index 000000000..f3feecc9c
--- /dev/null
+++ b/apps/application/flow/step_node/speech_to_text_step_node/__init__.py
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .impl import *
diff --git a/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py
new file mode 100644
index 000000000..7e2a79b56
--- /dev/null
+++ b/apps/application/flow/step_node/speech_to_text_step_node/i_speech_to_text_node.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+
+from typing import Type
+
+from rest_framework import serializers
+
+from application.flow.i_step_node import INode, NodeResult
+from common.util.field_message import ErrMessage
+
+
+class SpeechToTextNodeSerializer(serializers.Serializer):
+ stt_model_id = serializers.CharField(required=True, error_messages=ErrMessage.char("模型id"))
+
+ is_result = serializers.BooleanField(required=False, error_messages=ErrMessage.boolean('是否返回内容'))
+
+ audio_list = serializers.ListField(required=False, error_messages=ErrMessage.list("音频"))
+
+
+class ISpeechToTextNode(INode):
+ type = 'speech-to-text-node'
+
+ def get_node_params_serializer_class(self) -> Type[serializers.Serializer]:
+ return SpeechToTextNodeSerializer
+
+ def _run(self):
+ res = self.workflow_manage.get_reference_field(self.node_params_serializer.data.get('audio_list')[0],
+ self.node_params_serializer.data.get('audio_list')[1:])
+ for audio in res:
+ if 'file_id' not in audio:
+ raise ValueError("参数值错误: 上传的图片中缺少file_id,音频上传失败")
+
+ return self.execute(audio=res, **self.node_params_serializer.data, **self.flow_params_serializer.data)
+
+ def execute(self, stt_model_id, chat_id,
+ audio,
+ **kwargs) -> NodeResult:
+ pass
diff --git a/apps/application/flow/step_node/speech_to_text_step_node/impl/__init__.py b/apps/application/flow/step_node/speech_to_text_step_node/impl/__init__.py
new file mode 100644
index 000000000..9d2da6158
--- /dev/null
+++ b/apps/application/flow/step_node/speech_to_text_step_node/impl/__init__.py
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .base_speech_to_text_node import BaseSpeechToTextNode
diff --git a/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py b/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py
new file mode 100644
index 000000000..ed2ca9a68
--- /dev/null
+++ b/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+import os
+import tempfile
+import time
+import io
+from typing import List, Dict
+
+from django.db.models import QuerySet
+from pydub import AudioSegment
+from concurrent.futures import ThreadPoolExecutor
+from application.flow.i_step_node import NodeResult, INode
+from application.flow.step_node.speech_to_text_step_node.i_speech_to_text_node import ISpeechToTextNode
+from common.util.common import split_and_transcribe
+from dataset.models import File
+from setting.models_provider.tools import get_model_instance_by_model_user_id
+
+
+class BaseSpeechToTextNode(ISpeechToTextNode):
+
+ def save_context(self, details, workflow_manage):
+ self.context['answer'] = details.get('answer')
+ self.answer_text = details.get('answer')
+
+ def execute(self, stt_model_id, chat_id, audio, **kwargs) -> NodeResult:
+ stt_model = get_model_instance_by_model_user_id(stt_model_id, self.flow_params_serializer.data.get('user_id'))
+ audio_list = audio
+ self.context['audio_list'] = audio
+
+
+ def process_audio_item(audio_item, model):
+ file = QuerySet(File).filter(id=audio_item['file_id']).first()
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
+ temp_file.write(file.get_byte().tobytes())
+ temp_file_path = temp_file.name
+ try:
+ return split_and_transcribe(temp_file_path, model)
+ finally:
+ os.remove(temp_file_path)
+
+ def process_audio_items(audio_list, model):
+ with ThreadPoolExecutor(max_workers=5) as executor:
+ results = list(executor.map(lambda item: process_audio_item(item, model), audio_list))
+ return '\n\n'.join(results)
+
+ result = process_audio_items(audio_list, stt_model)
+ return NodeResult({'answer': result, 'result': result}, {})
+
+ def get_details(self, index: int, **kwargs):
+ return {
+ 'name': self.node.properties.get('stepName'),
+ "index": index,
+ 'run_time': self.context.get('run_time'),
+ 'answer': self.context.get('answer'),
+ 'type': self.node.type,
+ 'status': self.status,
+ 'err_message': self.err_message,
+ 'audio_list': self.context.get('audio_list'),
+ }
diff --git a/apps/application/flow/step_node/start_node/impl/base_start_node.py b/apps/application/flow/step_node/start_node/impl/base_start_node.py
index 59f875fcc..bf5203274 100644
--- a/apps/application/flow/step_node/start_node/impl/base_start_node.py
+++ b/apps/application/flow/step_node/start_node/impl/base_start_node.py
@@ -39,6 +39,7 @@ class BaseStartStepNode(IStarNode):
self.context['run_time'] = details.get('run_time')
self.context['document'] = details.get('document_list')
self.context['image'] = details.get('image_list')
+ self.context['audio'] = details.get('audio_list')
self.status = details.get('status')
self.err_message = details.get('err_message')
for key, value in workflow_variable.items():
@@ -57,7 +58,8 @@ class BaseStartStepNode(IStarNode):
node_variable = {
'question': question,
'image': self.workflow_manage.image_list,
- 'document': self.workflow_manage.document_list
+ 'document': self.workflow_manage.document_list,
+ 'audio': self.workflow_manage.audio_list
}
return NodeResult(node_variable, workflow_variable)
@@ -80,5 +82,6 @@ class BaseStartStepNode(IStarNode):
'err_message': self.err_message,
'image_list': self.context.get('image'),
'document_list': self.context.get('document'),
+ 'audio_list': self.context.get('audio'),
'global_fields': global_fields
}
diff --git a/apps/application/flow/step_node/text_to_speech_step_node/__init__.py b/apps/application/flow/step_node/text_to_speech_step_node/__init__.py
new file mode 100644
index 000000000..f3feecc9c
--- /dev/null
+++ b/apps/application/flow/step_node/text_to_speech_step_node/__init__.py
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .impl import *
diff --git a/apps/application/flow/step_node/text_to_speech_step_node/i_text_to_speech_node.py b/apps/application/flow/step_node/text_to_speech_step_node/i_text_to_speech_node.py
new file mode 100644
index 000000000..8b16301a5
--- /dev/null
+++ b/apps/application/flow/step_node/text_to_speech_step_node/i_text_to_speech_node.py
@@ -0,0 +1,35 @@
+# coding=utf-8
+
+from typing import Type
+
+from rest_framework import serializers
+
+from application.flow.i_step_node import INode, NodeResult
+from common.util.field_message import ErrMessage
+
+
+class TextToSpeechNodeSerializer(serializers.Serializer):
+ tts_model_id = serializers.CharField(required=True, error_messages=ErrMessage.char("模型id"))
+
+ is_result = serializers.BooleanField(required=False, error_messages=ErrMessage.boolean('是否返回内容'))
+
+ content_list = serializers.ListField(required=False, error_messages=ErrMessage.list("文本内容"))
+ model_params_setting = serializers.DictField(required=False,
+ error_messages=ErrMessage.integer("模型参数相关设置"))
+
+
+class ITextToSpeechNode(INode):
+ type = 'text-to-speech-node'
+
+ def get_node_params_serializer_class(self) -> Type[serializers.Serializer]:
+ return TextToSpeechNodeSerializer
+
+ def _run(self):
+ content = self.workflow_manage.get_reference_field(self.node_params_serializer.data.get('content_list')[0],
+ self.node_params_serializer.data.get('content_list')[1:])
+ return self.execute(content=content, **self.node_params_serializer.data, **self.flow_params_serializer.data)
+
+ def execute(self, tts_model_id, chat_id,
+ content, model_params_setting=None,
+ **kwargs) -> NodeResult:
+ pass
diff --git a/apps/application/flow/step_node/text_to_speech_step_node/impl/__init__.py b/apps/application/flow/step_node/text_to_speech_step_node/impl/__init__.py
new file mode 100644
index 000000000..385b9718f
--- /dev/null
+++ b/apps/application/flow/step_node/text_to_speech_step_node/impl/__init__.py
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .base_text_to_speech_node import BaseTextToSpeechNode
diff --git a/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py
new file mode 100644
index 000000000..9bd36fc22
--- /dev/null
+++ b/apps/application/flow/step_node/text_to_speech_step_node/impl/base_text_to_speech_node.py
@@ -0,0 +1,73 @@
+# coding=utf-8
+import io
+import mimetypes
+
+from django.core.files.uploadedfile import InMemoryUploadedFile
+
+from application.flow.i_step_node import NodeResult, INode
+from application.flow.step_node.image_understand_step_node.i_image_understand_node import IImageUnderstandNode
+from application.flow.step_node.text_to_speech_step_node.i_text_to_speech_node import ITextToSpeechNode
+from dataset.models import File
+from dataset.serializers.file_serializers import FileSerializer
+from setting.models_provider.tools import get_model_instance_by_model_user_id
+
+
+def bytes_to_uploaded_file(file_bytes, file_name="generated_audio.mp3"):
+ content_type, _ = mimetypes.guess_type(file_name)
+ if content_type is None:
+ # 如果未能识别,设置为默认的二进制文件类型
+ content_type = "application/octet-stream"
+ # 创建一个内存中的字节流对象
+ file_stream = io.BytesIO(file_bytes)
+
+ # 获取文件大小
+ file_size = len(file_bytes)
+
+ uploaded_file = InMemoryUploadedFile(
+ file=file_stream,
+ field_name=None,
+ name=file_name,
+ content_type=content_type,
+ size=file_size,
+ charset=None,
+ )
+ return uploaded_file
+
+
+class BaseTextToSpeechNode(ITextToSpeechNode):
+ def save_context(self, details, workflow_manage):
+ self.context['answer'] = details.get('answer')
+ self.answer_text = details.get('answer')
+
+ def execute(self, tts_model_id, chat_id,
+ content, model_params_setting=None,
+ **kwargs) -> NodeResult:
+ self.context['content'] = content
+ model = get_model_instance_by_model_user_id(tts_model_id, self.flow_params_serializer.data.get('user_id'),
+ **model_params_setting)
+ audio_byte = model.text_to_speech(content)
+ # 需要把这个音频文件存储到数据库中
+ file_name = 'generated_audio.mp3'
+ file = bytes_to_uploaded_file(audio_byte, file_name)
+ application = self.workflow_manage.work_flow_post_handler.chat_info.application
+ meta = {
+ 'debug': False if application.id else True,
+ 'chat_id': chat_id,
+ 'application_id': str(application.id) if application.id else None,
+ }
+ file_url = FileSerializer(data={'file': file, 'meta': meta}).upload()
+ # 拼接一个audio标签的src属性
+ audio_label = f''
+ return NodeResult({'answer': audio_label, 'result': audio_label}, {})
+
+ def get_details(self, index: int, **kwargs):
+ return {
+ 'name': self.node.properties.get('stepName'),
+ "index": index,
+ 'run_time': self.context.get('run_time'),
+ 'type': self.node.type,
+ 'status': self.status,
+ 'content': self.context.get('content'),
+ 'err_message': self.err_message,
+ 'answer': self.context.get('answer'),
+ }
diff --git a/apps/application/flow/workflow_manage.py b/apps/application/flow/workflow_manage.py
index 4a8e0b922..02397ec42 100644
--- a/apps/application/flow/workflow_manage.py
+++ b/apps/application/flow/workflow_manage.py
@@ -54,7 +54,7 @@ class Node:
end_nodes = ['ai-chat-node', 'reply-node', 'function-node', 'function-lib-node', 'application-node',
- 'image-understand-node', 'image-generate-node']
+ 'image-understand-node', 'speech-to-text-node', 'text-to-speech-node', 'image-generate-node']
class Flow:
@@ -244,6 +244,7 @@ class WorkflowManage:
def __init__(self, flow: Flow, params, work_flow_post_handler: WorkFlowPostHandler,
base_to_response: BaseToResponse = SystemToResponse(), form_data=None, image_list=None,
document_list=None,
+ audio_list=None,
start_node_id=None,
start_node_data=None, chat_record=None, child_node=None):
if form_data is None:
@@ -252,11 +253,14 @@ class WorkflowManage:
image_list = []
if document_list is None:
document_list = []
+ if audio_list is None:
+ audio_list = []
self.start_node_id = start_node_id
self.start_node = None
self.form_data = form_data
self.image_list = image_list
self.document_list = document_list
+ self.audio_list = audio_list
self.params = params
self.flow = flow
self.lock = threading.Lock()
diff --git a/apps/application/serializers/chat_message_serializers.py b/apps/application/serializers/chat_message_serializers.py
index 84e8376e0..3e779c965 100644
--- a/apps/application/serializers/chat_message_serializers.py
+++ b/apps/application/serializers/chat_message_serializers.py
@@ -245,6 +245,7 @@ class ChatMessageSerializer(serializers.Serializer):
form_data = serializers.DictField(required=False, error_messages=ErrMessage.char("全局变量"))
image_list = serializers.ListField(required=False, error_messages=ErrMessage.list("图片"))
document_list = serializers.ListField(required=False, error_messages=ErrMessage.list("文档"))
+ audio_list = serializers.ListField(required=False, error_messages=ErrMessage.list("音频"))
child_node = serializers.DictField(required=False, allow_null=True, error_messages=ErrMessage.dict("子节点"))
def is_valid_application_workflow(self, *, raise_exception=False):
@@ -338,6 +339,7 @@ class ChatMessageSerializer(serializers.Serializer):
form_data = self.data.get('form_data')
image_list = self.data.get('image_list')
document_list = self.data.get('document_list')
+ audio_list = self.data.get('audio_list')
user_id = chat_info.application.user_id
chat_record_id = self.data.get('chat_record_id')
chat_record = None
@@ -354,7 +356,7 @@ class ChatMessageSerializer(serializers.Serializer):
'client_id': client_id,
'client_type': client_type,
'user_id': user_id}, WorkFlowPostHandler(chat_info, client_id, client_type),
- base_to_response, form_data, image_list, document_list,
+ base_to_response, form_data, image_list, document_list, audio_list,
self.data.get('runtime_node_id'),
self.data.get('node_data'), chat_record, self.data.get('child_node'))
r = work_flow_manage.run()
diff --git a/apps/application/views/chat_views.py b/apps/application/views/chat_views.py
index ccd727c8b..54d8cf532 100644
--- a/apps/application/views/chat_views.py
+++ b/apps/application/views/chat_views.py
@@ -134,6 +134,8 @@ class ChatView(APIView):
'image_list') if 'image_list' in request.data else [],
'document_list': request.data.get(
'document_list') if 'document_list' in request.data else [],
+ 'audio_list': request.data.get(
+ 'audio_list') if 'audio_list' in request.data else [],
'client_type': request.auth.client_type,
'node_id': request.data.get('node_id', None),
'runtime_node_id': request.data.get('runtime_node_id', None),
diff --git a/apps/common/util/common.py b/apps/common/util/common.py
index 230727622..73107d62f 100644
--- a/apps/common/util/common.py
+++ b/apps/common/util/common.py
@@ -8,13 +8,15 @@
"""
import hashlib
import importlib
-import mimetypes
import io
+import shutil
+import mimetypes
from functools import reduce
from typing import Dict, List
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.db.models import QuerySet
+from pydub import AudioSegment
from ..exception.app_exception import AppApiException
from ..models.db_model_manage import DBModelManage
@@ -136,3 +138,61 @@ def bytes_to_uploaded_file(file_bytes, file_name="file.txt"):
charset=None,
)
return uploaded_file
+
+def any_to_amr(any_path, amr_path):
+ """
+ 把任意格式转成amr文件
+ """
+ if any_path.endswith(".amr"):
+ shutil.copy2(any_path, amr_path)
+ return
+ if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
+ raise NotImplementedError("Not support file type: {}".format(any_path))
+ audio = AudioSegment.from_file(any_path)
+ audio = audio.set_frame_rate(8000) # only support 8000
+ audio.export(amr_path, format="amr")
+ return audio.duration_seconds * 1000
+
+
+def any_to_mp3(any_path, mp3_path):
+ """
+ 把任意格式转成mp3文件
+ """
+ if any_path.endswith(".mp3"):
+ shutil.copy2(any_path, mp3_path)
+ return
+ if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
+ sil_to_wav(any_path, any_path)
+ any_path = mp3_path
+ audio = AudioSegment.from_file(any_path)
+ audio.export(mp3_path, format="mp3")
+
+
+def sil_to_wav(silk_path, wav_path, rate: int = 24000):
+ """
+ silk 文件转 wav
+ """
+ try:
+ import pysilk
+ except ImportError:
+ raise AppApiException("import pysilk failed, wechaty voice message will not be supported.")
+ wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate)
+ with open(wav_path, "wb") as f:
+ f.write(wav_data)
+
+
+def split_and_transcribe(file_path, model, max_segment_length_ms=59000, format="mp3"):
+ audio_data = AudioSegment.from_file(file_path, format=format)
+ audio_length_ms = len(audio_data)
+
+ if audio_length_ms <= max_segment_length_ms:
+ return model.speech_to_text(io.BytesIO(audio_data.export(format=format).read()))
+
+ full_text = []
+ for start_ms in range(0, audio_length_ms, max_segment_length_ms):
+ end_ms = min(audio_length_ms, start_ms + max_segment_length_ms)
+ segment = audio_data[start_ms:end_ms]
+ text = model.speech_to_text(io.BytesIO(segment.export(format=format).read()))
+ if isinstance(text, str):
+ full_text.append(text)
+ return ' '.join(full_text)
diff --git a/apps/dataset/serializers/file_serializers.py b/apps/dataset/serializers/file_serializers.py
index 2512f13e6..28a806338 100644
--- a/apps/dataset/serializers/file_serializers.py
+++ b/apps/dataset/serializers/file_serializers.py
@@ -77,5 +77,9 @@ class FileSerializer(serializers.Serializer):
file = QuerySet(File).filter(id=file_id).first()
if file is None:
raise NotFound404(404, "不存在的文件")
+ # 如果是mp3文件,直接返回文件流
+ if file.file_name.split(".")[-1] == 'mp3':
+ return HttpResponse(file.get_byte(), status=200, headers={'Content-Type': 'audio/mp3',
+ 'Content-Disposition': 'attachment; filename="abc.mp3"'})
return HttpResponse(file.get_byte(), status=200,
headers={'Content-Type': mime_types.get(file.file_name.split(".")[-1], 'text/plain')})
diff --git a/apps/dataset/views/file.py b/apps/dataset/views/file.py
index 7ec437d71..395d9418f 100644
--- a/apps/dataset/views/file.py
+++ b/apps/dataset/views/file.py
@@ -36,8 +36,8 @@ class FileView(APIView):
class Operate(APIView):
@action(methods=['GET'], detail=False)
- @swagger_auto_schema(operation_summary="获取图片",
- operation_id="获取图片",
+ @swagger_auto_schema(operation_summary="获取文件",
+ operation_id="获取文件",
tags=["文件"])
def get(self, request: Request, file_id: str):
return FileSerializer.Operate(data={'id': file_id}).get()
diff --git a/pyproject.toml b/pyproject.toml
index 24bbcb0ac..fbbb6c95f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,10 @@ django-celery-beat = "^2.6.0"
celery-once = "^3.0.1"
anthropic = "^0.34.2"
pylint = "3.1.0"
+ffmpeg-python = "^0.2.0"
+pydub = "^0.25.1"
+cffi = "^1.17.1"
+pysilk = "^0.0.1"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
diff --git a/ui/src/api/type/application.ts b/ui/src/api/type/application.ts
index 868528023..ee1f07ca9 100644
--- a/ui/src/api/type/application.ts
+++ b/ui/src/api/type/application.ts
@@ -63,6 +63,7 @@ interface chatType {
upload_meta?: {
document_list: Array 语音文件:
语音文件:
+ +文本内容:
+文本内容:
+语音文件:
+ +