feat: Application text to speech and speech to text functions (#3433)

This commit is contained in:
shaohuzhang1 2025-06-30 18:03:11 +08:00 committed by GitHub
parent 2aa86ebfaa
commit d8a9c9ccdd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 506 additions and 79 deletions

View File

@ -12,7 +12,7 @@ from drf_spectacular.utils import OpenApiParameter
from rest_framework import serializers
from application.serializers.application import ApplicationCreateSerializer, ApplicationListResponse, \
ApplicationImportRequest, ApplicationEditSerializer
ApplicationImportRequest, ApplicationEditSerializer, TextToSpeechRequest, SpeechToTextRequest, PlayDemoTextRequest
from common.mixins.api_mixin import APIMixin
from common.result import ResultSerializer, ResultPageSerializer, DefaultResultSerializer
@ -167,3 +167,45 @@ class ApplicationEditAPI(APIMixin):
@staticmethod
def get_request():
return ApplicationEditSerializer
class TextToSpeechAPI(APIMixin):
@staticmethod
def get_parameters():
return ApplicationOperateAPI.get_parameters()
@staticmethod
def get_request():
return TextToSpeechRequest
@staticmethod
def get_response():
return DefaultResultSerializer
class SpeechToTextAPI(APIMixin):
@staticmethod
def get_parameters():
return ApplicationOperateAPI.get_parameters()
@staticmethod
def get_request():
return SpeechToTextRequest
@staticmethod
def get_response():
return DefaultResultSerializer
class PlayDemoTextAPI(APIMixin):
@staticmethod
def get_parameters():
return ApplicationOperateAPI.get_parameters()
@staticmethod
def get_request():
return PlayDemoTextRequest
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -103,7 +103,7 @@ class BaseSearchDatasetStep(ISearchDatasetStep):
paragraph_list = native_search(QuerySet(Paragraph).filter(id__in=paragraph_id_list),
get_file_content(
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
'list_dataset_paragraph_by_paragraph_id.sql')),
'list_knowledge_paragraph_by_paragraph_id.sql')),
with_table_name=True)
# 如果向量库中存在脏数据 直接删除
if len(paragraph_list) != len(paragraph_id_list):

View File

@ -6,8 +6,10 @@
@date2025/5/26 17:03
@desc:
"""
import asyncio
import datetime
import hashlib
import json
import os
import pickle
import re
@ -19,6 +21,7 @@ from django.db import models, transaction
from django.db.models import QuerySet, Q
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _
from langchain_mcp_adapters.client import MultiServerMCPClient
from rest_framework import serializers, status
from rest_framework.utils.formatting import lazy_format
@ -36,6 +39,7 @@ from knowledge.models import Knowledge, KnowledgeScope
from knowledge.serializers.knowledge import KnowledgeSerializer, KnowledgeModelSerializer
from maxkb.conf import PROJECT_DIR
from models_provider.models import Model
from models_provider.tools import get_model_instance_by_model_workspace_id
from system_manage.models import WorkspaceUserResourcePermission
from tools.models import Tool, ToolScope
from tools.serializers.tool import ToolModelSerializer
@ -384,9 +388,9 @@ class ApplicationEditSerializer(serializers.Serializer):
label=_("Historical chat records"))
prologue = serializers.CharField(required=False, allow_null=True, allow_blank=True, max_length=102400,
label=_("Opening remarks"))
dataset_id_list = serializers.ListSerializer(required=False, child=serializers.UUIDField(required=True),
label=_("Related Knowledge Base")
)
knowledge_id_list = serializers.ListSerializer(required=False, child=serializers.UUIDField(required=True),
label=_("Related Knowledge Base")
)
# 数据集相关设置
knowledge_setting = KnowledgeSettingSerializer(required=False, allow_null=True,
label=_("Dataset settings"))
@ -441,8 +445,8 @@ class ApplicationSerializer(serializers.Serializer):
return ApplicationCreateSerializer.ApplicationResponse(application_model).data
@staticmethod
def to_application_knowledge_mapping(application_id: str, dataset_id: str):
return ApplicationKnowledgeMapping(id=uuid.uuid7(), application_id=application_id, dataset_id=dataset_id)
def to_application_knowledge_mapping(application_id: str, knowledge_id: str):
return ApplicationKnowledgeMapping(id=uuid.uuid7(), application_id=application_id, knowledge_id=knowledge_id)
def insert_simple(self, instance: Dict):
self.is_valid(raise_exception=True)
@ -451,10 +455,10 @@ class ApplicationSerializer(serializers.Serializer):
ApplicationCreateSerializer.SimplateRequest(data=instance).is_valid(user_id=user_id, raise_exception=True)
application_model = ApplicationCreateSerializer.SimplateRequest.to_application_model(user_id, workspace_id,
instance)
dataset_id_list = instance.get('knowledge_id_list', [])
knowledge_id_list = instance.get('knowledge_id_list', [])
application_knowledge_mapping_model_list = [
self.to_application_knowledge_mapping(application_model.id, dataset_id) for
dataset_id in dataset_id_list]
self.to_application_knowledge_mapping(application_model.id, knowledge_id) for
knowledge_id in knowledge_id_list]
# 插入应用
application_model.save()
# 插入认证信息
@ -519,15 +523,15 @@ class ApplicationSerializer(serializers.Serializer):
def to_application(application, workspace_id, user_id):
work_flow = application.get('work_flow')
for node in work_flow.get('nodes', []):
if node.get('type') == 'search-dataset-node':
node.get('properties', {}).get('node_data', {})['dataset_id_list'] = []
if node.get('type') == 'search-knowledge-node':
node.get('properties', {}).get('node_data', {})['knowledge_id_list'] = []
return Application(id=uuid.uuid7(),
user_id=user_id,
name=application.get('name'),
workspace_id=workspace_id,
desc=application.get('desc'),
prologue=application.get('prologue'), dialogue_number=application.get('dialogue_number'),
dataset_setting=application.get('dataset_setting'),
knowledge_setting=application.get('knowledge_setting'),
model_setting=application.get('model_setting'),
model_params_setting=application.get('model_params_setting'),
tts_model_params_setting=application.get('tts_model_params_setting'),
@ -545,6 +549,27 @@ class ApplicationSerializer(serializers.Serializer):
)
class TextToSpeechRequest(serializers.Serializer):
text = serializers.CharField(required=True, label=_('Text'))
class SpeechToTextRequest(serializers.Serializer):
file = UploadedFileField(required=True, label=_("file"))
class PlayDemoTextRequest(serializers.Serializer):
tts_model_id = serializers.UUIDField(required=True, label=_('Text to speech model ID'))
async def get_mcp_tools(servers):
async with MultiServerMCPClient(servers) as client:
return client.get_tools()
class McpServersSerializer(serializers.Serializer):
mcp_servers = serializers.JSONField(required=True)
class ApplicationOperateSerializer(serializers.Serializer):
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
user_id = serializers.UUIDField(required=True, label=_("User ID"))
@ -559,6 +584,23 @@ class ApplicationOperateSerializer(serializers.Serializer):
if not query_set.exists():
raise AppApiException(500, _('Application id does not exist'))
def get_mcp_servers(self, instance, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
McpServersSerializer(data=instance).is_valid(raise_exception=True)
servers = json.loads(instance.get('mcp_servers'))
tools = []
for server in servers:
tools += [
{
'server': server,
'name': tool.name,
'description': tool.description,
'args_schema': tool.args_schema,
}
for tool in asyncio.run(get_mcp_tools({server: servers[server]}))]
return tools
def delete(self, with_valid=True):
if with_valid:
self.is_valid()
@ -691,7 +733,7 @@ class ApplicationOperateSerializer(serializers.Serializer):
if application.type == ApplicationTypeChoices.SIMPLE.value:
application.is_publish = True
update_keys = ['name', 'desc', 'model_id', 'multiple_rounds_dialogue', 'prologue', 'status',
'dataset_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
'knowledge_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
'tts_autoplay', 'stt_autosend', 'file_upload_enable', 'file_upload_setting',
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting',
@ -746,7 +788,7 @@ class ApplicationOperateSerializer(serializers.Serializer):
"""
修改知识库检索节点 数据
定义 all_knowledge_id_list: 所有的关联知识库
dataset_id_list: 当前用户可看到的关联知识库列表
knowledge_id_list: 当前用户可看到的关联知识库列表
knowledge_list: 用户
@param workflow: 知识库
@param available_knowledge_dict: 当前用户可用的知识库
@ -802,3 +844,35 @@ class ApplicationOperateSerializer(serializers.Serializer):
QuerySet(ApplicationKnowledgeMapping).bulk_create(
[ApplicationKnowledgeMapping(application_id=application_id, knowledge_id=knowledge_id) for knowledge_id in
knowledge_id_list]) if len(knowledge_id_list) > 0 else None
def speech_to_text(self, instance, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
SpeechToTextRequest(data=instance).is_valid(raise_exception=True)
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
if application.stt_model_enable:
model = get_model_instance_by_model_workspace_id(application.stt_model_id, application.workspace_id)
text = model.speech_to_text(instance.get('file'))
return text
def text_to_speech(self, instance, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
TextToSpeechRequest(data=instance).is_valid(raise_exception=True)
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
if application.tts_model_enable:
model = get_model_instance_by_model_workspace_id(application.tts_model_id, application.workspace_id,
**application.tts_model_params_setting)
return model.text_to_speech(instance.get('text'))
def play_demo_text(self, instance, with_valid=True):
text = '你好,这里是语音播放测试'
if with_valid:
self.is_valid(raise_exception=True)
PlayDemoTextRequest(data=instance).is_valid(raise_exception=True)
tts_model_id = instance.pop('tts_model_id')
model = get_model_instance_by_model_workspace_id(tts_model_id, self.data.get('workspace_id'), **instance)
return model.text_to_speech(text)

View File

@ -30,6 +30,10 @@ urlpatterns = [
path('workspace/<str:workspace_id>/application/<str:application_id>/work_flow_version/<int:current_page>/<int:page_size>', views.ApplicationVersionView.Page.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/work_flow_version/<str:work_flow_version_id>', views.ApplicationVersionView.Operate.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/open', views.OpenView.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/text_to_speech', views.TextToSpeech.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/speech_to_text', views.SpeechToText.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/play_demo_text', views.PlayDemoText.as_view()),
path('workspace/<str:workspace_id>/application/<str:application_id>/mcp_tools', views.McpServers.as_view()),
path('chat_message/<str:chat_id>', views.ChatView.as_view()),
]

View File

@ -7,6 +7,7 @@
@desc:
"""
from django.db.models import QuerySet
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _
from drf_spectacular.utils import extend_schema
from rest_framework.parsers import MultiPartParser
@ -14,13 +15,14 @@ from rest_framework.request import Request
from rest_framework.views import APIView
from application.api.application_api import ApplicationCreateAPI, ApplicationQueryAPI, ApplicationImportAPI, \
ApplicationExportAPI, ApplicationOperateAPI, ApplicationEditAPI
ApplicationExportAPI, ApplicationOperateAPI, ApplicationEditAPI, TextToSpeechAPI, SpeechToTextAPI, PlayDemoTextAPI
from application.models import Application
from application.serializers.application import ApplicationSerializer, Query, ApplicationOperateSerializer
from application.serializers.application import ApplicationSerializer, Query, ApplicationOperateSerializer, \
McpServersSerializer
from common import result
from common.auth import TokenAuth
from common.auth.authentication import has_permissions
from common.constants.permission_constants import PermissionConstants, RoleConstants
from common.constants.permission_constants import PermissionConstants, RoleConstants, CompareConstants
from common.log.log import log
@ -233,3 +235,101 @@ class ApplicationAPI(APIView):
ApplicationOperateSerializer(
data={'application_id': application_id, 'user_id': request.user.id,
'workspace_id': workspace_id, }).publish(request.data))
class McpServers(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['GET'],
description=_("speech to text"),
summary=_("speech to text"),
operation_id=_("speech to text"), # type: ignore
parameters=SpeechToTextAPI.get_parameters(),
request=SpeechToTextAPI.get_request(),
responses=SpeechToTextAPI.get_response(),
tags=[_('Application')] # type: ignore
)
@has_permissions(PermissionConstants.APPLICATION_READ.get_workspace_application_permission(),
PermissionConstants.APPLICATION_READ.get_workspace_permission_workspace_manage_role(),
RoleConstants.USER.get_workspace_role(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role())
def get(self, request: Request, workspace_id, application_id: str):
return result.success(ApplicationOperateSerializer(
data={'mcp_servers': request.query_params.get('mcp_servers')}).get_mcp_servers())
class SpeechToText(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_("speech to text"),
summary=_("speech to text"),
operation_id=_("speech to text"), # type: ignore
parameters=SpeechToTextAPI.get_parameters(),
request=SpeechToTextAPI.get_request(),
responses=SpeechToTextAPI.get_response(),
tags=[_('Application')] # type: ignore
)
@has_permissions(PermissionConstants.APPLICATION_EDIT.get_workspace_application_permission(),
PermissionConstants.APPLICATION_EDIT.get_workspace_permission_workspace_manage_role(),
RoleConstants.USER.get_workspace_role(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role())
def post(self, request: Request, workspace_id: str, application_id: str):
return result.success(
ApplicationOperateSerializer(
data={'application_id': application_id, 'workspace_id': workspace_id, 'user_id': request.user.id})
.speech_to_text({'file': request.FILES.get('file')}))
class TextToSpeech(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_("text to speech"),
summary=_("text to speech"),
operation_id=_("text to speech"), # type: ignore
parameters=TextToSpeechAPI.get_parameters(),
request=TextToSpeechAPI.get_request(),
responses=TextToSpeechAPI.get_response(),
tags=[_('Application')] # type: ignore
)
@has_permissions(PermissionConstants.APPLICATION_EDIT.get_workspace_application_permission(),
PermissionConstants.APPLICATION_EDIT.get_workspace_permission_workspace_manage_role(),
RoleConstants.USER.get_workspace_role(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role())
def post(self, request: Request, workspace_id: str, application_id: str):
byte_data = ApplicationOperateSerializer(
data={'application_id': application_id, 'workspace_id': workspace_id,
'user_id': request.user.id}).text_to_speech(request.data)
return HttpResponse(byte_data, status=200, headers={'Content-Type': 'audio/mp3',
'Content-Disposition': 'attachment; filename="abc.mp3"'})
class PlayDemoText(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_("PlayDemo"),
summary=_("PlayDemo"),
operation_id=_("PlayDemo"), # type: ignore
parameters=PlayDemoTextAPI.get_parameters(),
request=PlayDemoTextAPI.get_request(),
responses=PlayDemoTextAPI.get_response(),
tags=[_('Application')] # type: ignore
)
@has_permissions(PermissionConstants.APPLICATION_EDIT.get_workspace_application_permission(),
PermissionConstants.APPLICATION_EDIT.get_workspace_permission_workspace_manage_role(),
RoleConstants.USER.get_workspace_role(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role())
@log(menu='Application', operate="trial listening",
get_operation_object=lambda r, k: get_application_operation_object(k.get('application_id')))
def post(self, request: Request, workspace_id: str, application_id: str):
byte_data = ApplicationOperateSerializer(
data={'application_id': application_id, 'workspace_id': workspace_id,
'user_id': request.user.id}).play_demo_text(request.data)
return HttpResponse(byte_data, status=200, headers={'Content-Type': 'audio/mp3',
'Content-Disposition': 'attachment; filename="abc.mp3"'})

View File

@ -27,6 +27,7 @@ from application.flow.i_step_node import WorkFlowPostHandler
from application.flow.workflow_manage import WorkflowManage
from application.models import Application, ApplicationTypeChoices, WorkFlowVersion, ApplicationKnowledgeMapping, \
ChatUserType, ApplicationChatUserStats, ApplicationAccessToken, ChatRecord, Chat
from application.serializers.application import ApplicationOperateSerializer
from application.serializers.common import ChatInfo
from common.exception.app_exception import AppApiException, AppChatNumOutOfBoundsFailed, ChatException
from common.handle.base_to_response import BaseToResponse
@ -282,7 +283,7 @@ class ChatSerializers(serializers.Serializer):
def re_open_chat_simple(self, chat_id, application):
# 数据集id列表
knowledge_id_list = [str(row.dataset_id) for row in
knowledge_id_list = [str(row.knowledge_id) for row in
QuerySet(ApplicationKnowledgeMapping).filter(
application_id=application.id)]
@ -292,7 +293,7 @@ class ChatSerializers(serializers.Serializer):
knowledge_id__in=knowledge_id_list,
is_active=False)]
chat_info = ChatInfo(chat_id, self.data.get('chat_user_id'), self.data.get('chat_user_type'), knowledge_id_list,
exclude_document_id_list, application)
exclude_document_id_list, application.id, application)
chat_record_list = list(QuerySet(ChatRecord).filter(chat_id=chat_id).order_by('-create_time')[0:5])
chat_record_list.sort(key=lambda r: r.create_time)
for chat_record in chat_record_list:
@ -378,3 +379,27 @@ class OpenChatSerializers(serializers.Serializer):
application_id,
application, debug=debug).set_cache()
return chat_id
class TextToSpeechSerializers(serializers.Serializer):
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
def text_to_speech(self, instance):
self.is_valid(raise_exception=True)
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
return ApplicationOperateSerializer(
data={'application_id': application_id,
'user_id': application.user_id}).text_to_speech(instance)
class SpeechToTextSerializers(serializers.Serializer):
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
def speech_to_text(self, instance):
self.is_valid(raise_exception=True)
application_id = self.data.get('application_id')
application = QuerySet(Application).filter(id=application_id).first()
return ApplicationOperateSerializer(
data={'application_id': application_id,
'user_id': application.user_id}).speech_to_text(instance)

View File

@ -11,6 +11,8 @@ urlpatterns = [
path('application/profile', views.ApplicationProfile.as_view()),
path('chat_message/<str:chat_id>', views.ChatView.as_view()),
path('open', views.OpenView.as_view()),
path('text_to_speech', views.TextToSpeech.as_view()),
path('speech_to_text', views.SpeechToText.as_view()),
path('captcha', views.CaptchaView.as_view(), name='captcha'),
path('vote/chat/<str:chat_id>/chat_record/<str:chat_record_id>', views.VoteView.as_view(), name='vote'),
path('historical_conversation', views.HistoricalConversationView.as_view(), name='historical_conversation'),

View File

@ -12,12 +12,15 @@ from drf_spectacular.utils import extend_schema
from rest_framework.request import Request
from rest_framework.views import APIView
from application.api.application_api import SpeechToTextAPI, TextToSpeechAPI
from application.serializers.application import ApplicationOperateSerializer
from chat.api.chat_api import ChatAPI
from chat.api.chat_authentication_api import ChatAuthenticationAPI, ChatAuthenticationProfileAPI, ChatOpenAPI
from chat.serializers.chat import OpenChatSerializers, ChatSerializers
from chat.serializers.chat import OpenChatSerializers, ChatSerializers, SpeechToTextSerializers, TextToSpeechSerializers
from chat.serializers.chat_authentication import AnonymousAuthenticationSerializer, ApplicationProfileSerializer, \
AuthProfileSerializer
from common.auth import TokenAuth
from common.auth.authentication import has_permissions
from common.constants.permission_constants import ChatAuth
from common.exception.app_exception import AppAuthenticationFailed
from common.result import result
@ -135,3 +138,41 @@ class CaptchaView(APIView):
responses=CaptchaAPI.get_response())
def get(self, request: Request):
return result.success(CaptchaSerializer().generate())
class SpeechToText(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_("speech to text"),
summary=_("speech to text"),
operation_id=_("speech to text"), # type: ignore
request=SpeechToTextAPI.get_request(),
responses=SpeechToTextAPI.get_response(),
tags=[_('Application')] # type: ignore
)
def post(self, request: Request):
return result.success(
SpeechToTextSerializers(
data={'application_id': request.auth.application_id})
.speech_to_text({'file': request.FILES.get('file')}))
class TextToSpeech(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_("text to speech"),
summary=_("text to speech"),
operation_id=_("text to speech"), # type: ignore
request=TextToSpeechAPI.get_request(),
responses=TextToSpeechAPI.get_response(),
tags=[_('Application')] # type: ignore
)
def post(self, request: Request):
byte_data = TextToSpeechSerializers(
data={'application_id': request.auth.application_id}).text_to_speech(request.data)
return HttpResponse(byte_data, status=200, headers={'Content-Type': 'audio/mp3',
'Content-Disposition': 'attachment; filename="abc.mp3"'})

BIN
ui/public/tipIMG.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 220 KiB

View File

@ -1,14 +1,14 @@
import {Result} from '@/request/Result'
import {get, post, postStream, del, put, request, download, exportFile} from '@/request/index'
import type {pageRequest} from '@/api/type/common'
import type {ApplicationFormType} from '@/api/type/application'
import {type Ref} from 'vue'
import { Result } from '@/request/Result'
import { get, post, postStream, del, put, request, download, exportFile } from '@/request/index'
import type { pageRequest } from '@/api/type/common'
import type { ApplicationFormType } from '@/api/type/application'
import { type Ref } from 'vue'
import useStore from '@/stores'
const prefix: any = {_value: '/workspace/'}
const prefix: any = { _value: '/workspace/' }
Object.defineProperty(prefix, 'value', {
get: function () {
const {user} = useStore()
const { user } = useStore()
return this._value + user.getWorkspaceId() + '/application'
},
})
@ -219,7 +219,7 @@ const updatePlatformConfig: (
application_id: string,
type: string,
data: any,
loading?: Ref<boolean>
loading?: Ref<boolean>,
) => Promise<Result<any>> = (application_id, type, data, loading) => {
return post(`${prefix.value}/${application_id}/platform/${type}`, data, undefined, loading)
}
@ -236,6 +236,55 @@ const publish: (
) => Promise<Result<any>> = (application_id, data, loading) => {
return put(`${prefix.value}/${application_id}/publish`, data, {}, loading)
}
/**
*
* @param application_id
* @param data
* @param loading
* @returns
*/
const playDemoText: (application_id: string, data: any, loading?: Ref<boolean>) => Promise<any> = (
application_id,
data,
loading,
) => {
return download(
`${prefix.value}/${application_id}/play_demo_text`,
'post',
data,
undefined,
loading,
)
}
/**
*
*/
const textToSpeech: (
application_id: String,
data: any,
loading?: Ref<boolean>,
) => Promise<Result<any>> = (application_id, data, loading) => {
return download(
`${prefix.value}/${application_id}/text_to_speech`,
'post',
data,
undefined,
loading,
)
}
/**
*
*/
const speechToText: (
application_id: String,
data: any,
loading?: Ref<boolean>,
) => Promise<Result<any>> = (application_id, data, loading) => {
return post(`${prefix.value}/${application_id}/speech_to_text`, data, undefined, loading)
}
export default {
getAllApplication,
getApplication,
@ -256,5 +305,8 @@ export default {
updatePlatformStatus,
getPlatformConfig,
publish,
updatePlatformConfig
updatePlatformConfig,
playDemoText,
textToSpeech,
speechToText,
}

View File

@ -244,7 +244,25 @@ const getChatRecord: (
) => Promise<Result<any>> = (chat_id, chat_record_id, loading) => {
return get(`historical_conversation/${chat_id}/record/${chat_record_id}`, {}, loading)
}
/**
*
*/
const textToSpeech: (data: any, loading?: Ref<boolean>) => Promise<Result<any>> = (
data,
loading,
) => {
return download(`text_to_speech`, 'post', data, undefined, loading)
}
/**
*
*/
const speechToText: (data: any, loading?: Ref<boolean>) => Promise<Result<any>> = (
data,
loading,
) => {
return post(`speech_to_text`, data, undefined, loading)
}
export default {
open,
chat,
@ -269,4 +287,6 @@ export default {
resetCurrentPassword,
getChatUserProfile,
getChatRecord,
textToSpeech,
speechToText,
}

View File

@ -301,7 +301,7 @@
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted, nextTick, watch } from 'vue'
import { ref, computed, onMounted, nextTick, watch, type Ref } from 'vue'
import Recorder from 'recorder-core'
import TouchChat from './TouchChat.vue'
import applicationApi from '@/api/application/application'
@ -314,6 +314,7 @@ import 'recorder-core/src/engine/mp3'
import 'recorder-core/src/engine/mp3-engine'
import { MsgWarning } from '@/utils/message'
import { t } from '@/locales'
import chatAPI from '@/api/chat/chat'
const router = useRouter()
const route = useRoute()
const {
@ -687,7 +688,7 @@ class RecorderManage {
`${err}
<div style="width: 100%;height:1px;border-top:1px var(--el-border-color) var(--el-border-style);margin:10px 0;"></div>
${t('chat.tip.recorderTip')}
<img src="${new URL(`@/assets/tipIMG.jpg`, import.meta.url).href}" style="width: 100%;" />`,
<img src="${new URL(`/tipIMG.jpg`, import.meta.url).href}" style="width: 100%;" />`,
{
confirmButtonText: t('chat.tip.confirm'),
dangerouslyUseHTMLString: true,
@ -697,6 +698,16 @@ class RecorderManage {
}
}
}
const getSpeechToTextAPI = () => {
if (props.type === 'ai-chat') {
return (application_id?: string, data?: any, loading?: Ref<boolean>) => {
return chatAPI.speechToText(data, loading)
}
} else {
return applicationApi.textToSpeech
}
}
const speechToTextAPI = getSpeechToTextAPI()
//
const uploadRecording = async (audioBlob: Blob) => {
try {
@ -710,8 +721,7 @@ const uploadRecording = async (audioBlob: Blob) => {
if (props.applicationDetails.stt_autosend) {
bus.emit('on:transcribing', true)
}
applicationApi
.postSpeechToText(props.applicationDetails.id as string, formData, localLoading)
speechToTextAPI(props.applicationDetails.id as string, formData, localLoading)
.then((response) => {
inputValue.value = typeof response.data === 'string' ? response.data : ''
//

View File

@ -99,7 +99,7 @@
</div>
</template>
<script setup lang="ts">
import { nextTick, onMounted, ref, onBeforeUnmount } from 'vue'
import { nextTick, onMounted, ref, onBeforeUnmount, type Ref } from 'vue'
import { useRoute } from 'vue-router'
import { copyClick } from '@/utils/clipboard'
import applicationApi from '@/api/application/application'
@ -262,6 +262,16 @@ enum AudioStatus {
*/
ERROR = 'ERROR',
}
const getTextToSpeechAPI = () => {
if (props.type === 'ai-chat') {
return (application_id?: string, data?: any, loading?: Ref<boolean>) => {
return chatAPI.textToSpeech(data, loading)
}
} else {
return applicationApi.textToSpeech
}
}
const textToSpeechAPI = getTextToSpeechAPI()
class AudioManage {
textList: Array<string>
statusList: Array<AudioStatus>
@ -313,12 +323,11 @@ class AudioManage {
audioElement.src = text.match(/src="([^"]*)"/)?.[1] || ''
this.statusList[index] = AudioStatus.READY
} else {
applicationApi
.postTextToSpeech(
(props.applicationId as string) || (id as string),
{ text: text },
loading,
)
textToSpeechAPI(
(props.applicationId as string) || (id as string),
{ text: text },
loading,
)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()
@ -376,12 +385,11 @@ class AudioManage {
if (audioElement instanceof HTMLAudioElement) {
const text = this.textList[index]
this.statusList[index] = AudioStatus.MOUNTED
applicationApi
.postTextToSpeech(
(props.applicationId as string) || (id as string),
{ text: text },
loading,
)
textToSpeechAPI(
(props.applicationId as string) || (id as string),
{ text: text },
loading,
)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()

View File

@ -500,4 +500,54 @@ export default {
])
},
},
'app-video-play': {
iconReader: () => {
return h('i', [
h(
'svg',
{
style: { height: '100%', width: '100%' },
viewBox: '0 0 1024 1024',
version: '1.1',
xmlns: 'http://www.w3.org/2000/svg',
},
[
h('path', {
d: 'M512 896a384 384 0 1 0 0-768 384 384 0 0 0 0 768z m469.333333-384c0 259.2-210.133333 469.333333-469.333333 469.333333S42.666667 771.2 42.666667 512 252.8 42.666667 512 42.666667s469.333333 210.133333 469.333333 469.333333z',
fill: 'currentColor',
}),
h('path', {
d: 'M686.890667 539.776l-253.141334 159.274667a32.298667 32.298667 0 0 1-44.8-10.453334 32.896 32.896 0 0 1-4.949333-17.322666V352.768a32.64 32.64 0 0 1 32.512-32.768c6.101333 0 12.074667 1.706667 17.28 4.992l253.098667 159.232a32.853333 32.853333 0 0 1 0 55.552z',
fill: 'currentColor',
}),
],
),
])
},
},
'app-video-pause': {
iconReader: () => {
return h('i', [
h(
'svg',
{
style: { height: '100%', width: '100%' },
viewBox: '0 0 1024 1024',
version: '1.1',
xmlns: 'http://www.w3.org/2000/svg',
},
[
h('path', {
d: 'M405.333333 341.333333a21.333333 21.333333 0 0 0-21.333333 21.333334v298.666666a21.333333 21.333333 0 0 0 21.333333 21.333334h42.666667a21.333333 21.333333 0 0 0 21.333333-21.333334v-298.666666a21.333333 21.333333 0 0 0-21.333333-21.333334h-42.666667zM576 341.333333a21.333333 21.333333 0 0 0-21.333333 21.333334v298.666666a21.333333 21.333333 0 0 0 21.333333 21.333334h42.666667a21.333333 21.333333 0 0 0 21.333333-21.333334v-298.666666a21.333333 21.333333 0 0 0-21.333333-21.333334h-42.666667z',
fill: 'currentColor',
}),
h('path', {
d: 'M512 42.666667C252.8 42.666667 42.666667 252.8 42.666667 512s210.133333 469.333333 469.333333 469.333333 469.333333-210.133333 469.333333-469.333333S771.2 42.666667 512 42.666667zM128 512a384 384 0 1 1 768 0 384 384 0 0 1-768 0z',
fill: 'currentColor',
}),
],
),
])
},
},
}

View File

@ -42,6 +42,10 @@ router.beforeEach(
})
return
}
const p_token = to.query.token
if (p_token) {
chatUser.setToken(p_token)
}
const token = chatUser.getToken()
if (authentication) {
if (!token && to.name != 'login') {

View File

@ -47,6 +47,7 @@ import ModelAPI from '@/api/model/model'
import applicationApi from '@/api/application/application'
import DynamicsForm from '@/components/dynamics-form/index.vue'
import { useRoute } from 'vue-router'
import { MsgError } from '@/utils/message'
const route = useRoute()
const {
params: { id },
@ -60,16 +61,11 @@ const form_data = ref<any>({})
const dialogVisible = ref(false)
const loading = ref(false)
const playLoading = ref(false)
const getApi = (model_id: string, application_id?: string) => {
return application_id
? applicationApi.getModelParamsForm(application_id, model_id, loading)
: ModelAPI.getModelParamsForm(model_id, loading)
}
const open = (model_id: string, application_id?: string, model_setting_data?: any) => {
form_data.value = {}
tts_model_id.value = model_id
const api = getApi(model_id, application_id)
api.then((ok) => {
ModelAPI.getModelParamsForm(model_id, loading).then((ok) => {
model_form_field.value = ok.data
const resp = ok.data
.map((item: any) => ({
@ -92,8 +88,7 @@ const open = (model_id: string, application_id?: string, model_setting_data?: an
}
const reset_default = (model_id: string, application_id?: string) => {
const api = getApi(model_id, application_id)
api.then((ok) => {
ModelAPI.getModelParamsForm(model_id, loading).then((ok) => {
model_form_field.value = ok.data
const model_setting_data = ok.data
.map((item) => ({
@ -118,31 +113,31 @@ const testPlay = () => {
...form_data.value,
tts_model_id: tts_model_id.value,
}
// applicationApi
// .playDemoText(id as string, data, playLoading)
// .then(async (res: any) => {
// if (res.type === 'application/json') {
// const text = await res.text()
// MsgError(text)
// return
// }
// // Blob
// const blob = new Blob([res], { type: 'audio/mp3' })
applicationApi
.playDemoText(id as string, data, playLoading)
.then(async (res: any) => {
if (res.type === 'application/json') {
const text = await res.text()
MsgError(text)
return
}
// Blob
const blob = new Blob([res], { type: 'audio/mp3' })
// // URL
// const url = URL.createObjectURL(blob)
// URL
const url = URL.createObjectURL(blob)
// // audioPlayer DOM
// if (audioPlayer.value instanceof HTMLAudioElement) {
// audioPlayer.value.src = url
// audioPlayer.value.play() //
// } else {
// console.error('audioPlayer.value is not an instance of HTMLAudioElement')
// }
// })
// .catch((err) => {
// console.log('err: ', err)
// })
// audioPlayer DOM
if (audioPlayer.value instanceof HTMLAudioElement) {
audioPlayer.value.src = url
audioPlayer.value.play() //
} else {
console.error('audioPlayer.value is not an instance of HTMLAudioElement')
}
})
.catch((err) => {
console.log('err: ', err)
})
}
defineExpose({ open, reset_default })