feat: Integrate with Tencent SentienceRecognition model

This commit is contained in:
zhangzhanwei 2025-08-14 11:19:25 +08:00 committed by zhanweizhang7
parent 8ee9bc04eb
commit f9f475a5a0
9 changed files with 418 additions and 4 deletions

View File

@ -8570,4 +8570,82 @@ msgid "Edit user authorization status of resource"
msgstr ""
msgid "Get user authorization status of resource by page"
msgstr ""
msgid "Obtain resource authorization list by page"
msgstr ""
msgid "Engine model type"
msgstr ""
msgid "If not passed, the default value is 16k_zh (Chinese universal)"
msgstr ""
msgid "Chinese telephone universal"
msgstr ""
msgid "English telephone universal"
msgstr ""
msgid "Commonly used in Chinese"
msgstr ""
msgid "Chinese, English, and Guangdong"
msgstr ""
msgid "Chinese medical"
msgstr ""
msgid "English"
msgstr ""
msgid "Cantonese"
msgstr ""
msgid "Japanese"
msgstr ""
msgid "Korean"
msgstr ""
msgid "Vietnamese"
msgstr ""
msgid "Malay language"
msgstr ""
msgid "Indonesian language"
msgstr ""
msgid "Filipino language"
msgstr ""
msgid "Thai"
msgstr ""
msgid "Portuguese"
msgstr ""
msgid "Turkish"
msgstr ""
msgid "Arabic"
msgstr ""
msgid "Spanish"
msgstr ""
msgid "Hindi"
msgstr ""
msgid "French"
msgstr ""
msgid "German"
msgstr ""
msgid "Multiple dialects, supporting 23 dialects"
msgstr ""
msgid "This interface is used to recognize short audio files within 60 seconds. Supports Mandarin Chinese, English, Cantonese, Japanese, Vietnamese, Malay, Indonesian, Filipino, Thai, Portuguese, Turkish, Arabic, Hindi, French, German, and 23 Chinese dialects."
msgstr ""

View File

@ -8696,4 +8696,82 @@ msgid "Edit user authorization status of resource"
msgstr "修改资源对用户的授权状态"
msgid "Get user authorization status of resource by page"
msgstr "分页获取资源对用户的授权状态"
msgstr "分页获取资源对用户的授权状态"
msgid "Obtain resource authorization list by page"
msgstr "分页获取资源授权列表"
msgid "Engine model type"
msgstr "引擎模型类型"
msgid "If not passed, the default value is 16k_zh (Chinese universal)"
msgstr "如果未传递,默认值为 16k_zh中文通用"
msgid "Chinese telephone universal"
msgstr "中文电话通用"
msgid "English telephone universal"
msgstr "英文电话通用"
msgid "Commonly used in Chinese"
msgstr "中文常用"
msgid "Chinese, English, and Guangdong"
msgstr "中文、英文和广东话"
msgid "Chinese medical"
msgstr "中文医疗"
msgid "English"
msgstr "英文"
msgid "Cantonese"
msgstr "粤语"
msgid "Japanese"
msgstr "日语"
msgid "Korean"
msgstr "韩语"
msgid "Vietnamese"
msgstr "越南语"
msgid "Malay language"
msgstr "马来语"
msgid "Indonesian language"
msgstr "印尼语"
msgid "Filipino language"
msgstr "菲律宾语"
msgid "Thai"
msgstr "泰语"
msgid "Portuguese"
msgstr "葡萄牙语"
msgid "Turkish"
msgstr "土耳其语"
msgid "Arabic"
msgstr "阿拉伯语"
msgid "Spanish"
msgstr "西班牙语"
msgid "Hindi"
msgstr "印地语"
msgid "French"
msgstr "法语"
msgid "German"
msgstr "德语"
msgid "Multiple dialects, supporting 23 dialects"
msgstr "多种方言,支持 23 种方言"
msgid "This interface is used to recognize short audio files within 60 seconds. Supports Mandarin Chinese, English, Cantonese, Japanese, Vietnamese, Malay, Indonesian, Filipino, Thai, Portuguese, Turkish, Arabic, Hindi, French, German, and 23 Chinese dialects."
msgstr "本接口用于识别 60 秒之内的短音频文件。支持中文普通话、英语、粤语、日语、越南语、马来语、印度尼西亚语、菲律宾语、泰语、葡萄牙语、土耳其语、阿拉伯语、印地语、法语、德语及 23 种汉语方言。"

View File

@ -8696,4 +8696,82 @@ msgid "Edit user authorization status of resource"
msgstr "修改資源對用戶的授權狀態"
msgid "Get user authorization status of resource by page"
msgstr "分頁獲取資源對用戶的授權狀態"
msgstr "分頁獲取資源對用戶的授權狀態"
msgid "Obtain resource authorization list by page"
msgstr "分頁獲取資源授權清單"
msgid "Engine model type"
msgstr "引擎模型類型"
msgid "If not passed, the default value is 16k_zh (Chinese universal)"
msgstr "如果未傳遞,默認值為 16k_zh中文通用"
msgid "Chinese telephone universal"
msgstr "中文電話通用"
msgid "English telephone universal"
msgstr "英文電話通用"
msgid "Commonly used in Chinese"
msgstr "中文常用"
msgid "Chinese, English, and Guangdong"
msgstr "中文、英文和廣東話"
msgid "Chinese medical"
msgstr "中文醫療"
msgid "English"
msgstr "英文"
msgid "Cantonese"
msgstr "粵語"
msgid "Japanese"
msgstr "日語"
msgid "Korean"
msgstr "韓語"
msgid "Vietnamese"
msgstr "越南語"
msgid "Malay language"
msgstr "馬來語"
msgid "Indonesian language"
msgstr "印尼語"
msgid "Filipino language"
msgstr "菲律賓語"
msgid "Thai"
msgstr "泰語"
msgid "Portuguese"
msgstr "葡萄牙語"
msgid "Turkish"
msgstr "土耳其語"
msgid "Arabic"
msgstr "阿拉伯語"
msgid "Spanish"
msgstr "西班牙語"
msgid "Hindi"
msgstr "印地語"
msgid "French"
msgstr "法語"
msgid "German"
msgstr "德語"
msgid "Multiple dialects, supporting 23 dialects"
msgstr "多種方言,支持 23 種方言"
msgid "This interface is used to recognize short audio files within 60 seconds. Supports Mandarin Chinese, English, Cantonese, Japanese, Vietnamese, Malay, Indonesian, Filipino, Thai, Portuguese, Turkish, Arabic, Hindi, French, German, and 23 Chinese dialects."
msgstr "本介面用於識別 60 秒之內的短音頻文件。支援中文普通話、英語、粵語、日語、越南語、馬來語、印度尼西亞語、菲律賓語、泰語、葡萄牙語、土耳其語、阿拉伯語、印地語、法語、德語及 23 種漢語方言。"

View File

@ -0,0 +1,90 @@
import traceback
from common import forms
from common.exception.app_exception import AppApiException
from common.forms import BaseForm, TooltipLabel
from django.utils.translation import gettext_lazy as _, gettext
from models_provider.base_model_provider import BaseModelCredential, ValidCode
class TencentSSTModelParams(BaseForm):
EngSerViceType = forms.SingleSelect(
TooltipLabel(_('Engine model type'), _('If not passed, the default value is 16k_zh (Chinese universal)')),
required=True,
default_value='16k_zh',
option_list=[
{"value": "8k_zh", "label": _("Chinese telephone universal")},
{"value": "8k_en", "label": _("English telephone universal")},
{"value": "16k_zh", "label": _("Commonly used in Chinese")},
{"value": "16k_zh-PY", "label": _("Chinese, English, and Guangdong")},
{"value": "16k_zh_medical", "label": _("Chinese medical")},
{"value": "16k_en", "label": _("English")},
{"value": "16k_yue", "label": _("Cantonese")},
{"value": "16k_ja", "label": _("Japanese")},
{"value": "16k_ko", "label": _("Korean")},
{"value": "16k_vi", "label": _("Vietnamese")},
{"value": "16k_ms", "label": _("Malay language")},
{"value": "16k_id", "label": _("Indonesian language")},
{"value": "16k_fil", "label": _("Filipino language")},
{"value": "16k_th", "label": _("Thai")},
{"value": "16k_pt", "label": _("Portuguese")},
{"value": "16k_tr", "label": _("Turkish")},
{"value": "16k_ar", "label": _("Arabic")},
{"value": "16k_es", "label": _("Spanish")},
{"value": "16k_hi", "label": _("Hindi")},
{"value": "16k_fr", "label": _("French")},
{"value": "16k_de", "label": _("German")},
{"value": "16k_zh_dialect", "label": _("Multiple dialects, supporting 23 dialects")}
],
value_field='value',
text_field='label'
)
class TencentSTTModelCredential(BaseForm, BaseModelCredential):
REQUIRED_FIELDS = ["SecretId", "SecretKey"]
@classmethod
def _validate_model_type(cls, model_type, provider, raise_exception=False):
if not any(mt['value'] == model_type for mt in provider.get_model_type_list()):
if raise_exception:
raise AppApiException(ValidCode.valid_error.value,
gettext('{model_type} Model type is not supported').format(model_type=model_type))
return False
return True
@classmethod
def _validate_credential_fields(cls, model_credential, raise_exception=False):
missing_keys = [key for key in cls.REQUIRED_FIELDS if key not in model_credential]
if missing_keys:
if raise_exception:
raise AppApiException(ValidCode.valid_error.value,
gettext('{keys} is required').format(keys=", ".join(missing_keys)))
return False
return True
def is_valid(self, model_type, model_name, model_credential, model_params, provider, raise_exception=False):
if not (self._validate_model_type(model_type, provider, raise_exception) and
self._validate_credential_fields(model_credential, raise_exception)):
return False
try:
model = provider.get_model(model_type, model_name, model_credential, **model_params)
model.check_auth()
except Exception as e:
traceback.print_exc()
if raise_exception:
raise AppApiException(ValidCode.valid_error.value,
gettext(
'Verification failed, please check whether the parameters are correct: {error}').format(
error=str(e)))
return False
return True
def encryption_dict(self, model):
return {**model, 'SecretKey': super().encryption(model.get('SecretKey', ''))}
SecretId = forms.PasswordInputField('SecretId', required=True)
SecretKey = forms.PasswordInputField('SecretKey', required=True)
def get_model_params_setting_form(self, model_name):
return TencentSSTModelParams()

View File

@ -0,0 +1,80 @@
import base64
import json
import os
from typing import Dict
from tencentcloud.asr.v20190614 import asr_client, models
from tencentcloud.common import credential
from tencentcloud.common.exception import TencentCloudSDKException
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from models_provider.base_model_provider import MaxKBBaseModel
from models_provider.impl.base_stt import BaseSpeechToText
class TencentSpeechToText(MaxKBBaseModel, BaseSpeechToText):
hunyuan_secret_id: str
hunyuan_secret_key: str
model: str
params: dict
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.hunyuan_secret_id = kwargs.get('hunyuan_secret_id')
self.hunyuan_secret_key = kwargs.get('hunyuan_secret_key')
self.model = kwargs.get('model')
self.params = kwargs.get('params')
@staticmethod
def is_cache_model():
return False
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
return TencentSpeechToText(
hunyuan_secret_id=model_credential.get('SecretId'),
hunyuan_secret_key=model_credential.get('SecretKey'),
model=model_name,
params=model_kwargs,
**model_kwargs
)
def check_auth(self):
cwd = os.path.dirname(os.path.abspath(__file__))
with open(f'{cwd}/iat_mp3_16k.mp3', 'rb') as f:
self.speech_to_text(f)
def speech_to_text(self, audio_file):
try:
cred = credential.Credential(self.hunyuan_secret_id, self.hunyuan_secret_key)
# 实例化一个http选项可选的没有特殊需求可以跳过
httpProfile = HttpProfile()
httpProfile.endpoint = "asr.tencentcloudapi.com"
# 实例化一个client选项可选的没有特殊需求可以跳过
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
# 实例化要请求产品的client对象,clientProfile是可选的
client = asr_client.AsrClient(cred, "", clientProfile)
buf = audio_file.read()
_v = base64.b64encode(buf)
# 实例化一个请求对象,每个接口都会对应一个request对象
req = models.SentenceRecognitionRequest()
params = {
"EngSerViceType": self.params.get('EngSerViceType'),
"SourceType": 1,
"VoiceFormat": "mp3",
"Data": _v.decode(),
}
req.from_json_string(json.dumps(params))
# 返回的resp是一个SentenceRecognitionResponse的实例与请求对象对应
resp = client.SentenceRecognition(req)
# 输出json格式的字符串回包
return resp.Result
except TencentCloudSDKException as err:
print(err)

View File

@ -9,10 +9,12 @@ from models_provider.base_model_provider import (
from models_provider.impl.tencent_model_provider.credential.embedding import TencentEmbeddingCredential
from models_provider.impl.tencent_model_provider.credential.image import TencentVisionModelCredential
from models_provider.impl.tencent_model_provider.credential.llm import TencentLLMModelCredential
from models_provider.impl.tencent_model_provider.credential.stt import TencentSTTModelCredential
from models_provider.impl.tencent_model_provider.credential.tti import TencentTTIModelCredential
from models_provider.impl.tencent_model_provider.model.embedding import TencentEmbeddingModel
from models_provider.impl.tencent_model_provider.model.image import TencentVision
from models_provider.impl.tencent_model_provider.model.llm import TencentModel
from models_provider.impl.tencent_model_provider.model.stt import TencentSpeechToText
from models_provider.impl.tencent_model_provider.model.tti import TencentTextToImageModel
from maxkb.conf import PROJECT_DIR
from django.utils.translation import gettext as _
@ -70,6 +72,12 @@ def _initialize_model_info():
ModelTypeConst.LLM,
TencentLLMModelCredential,
TencentModel),
_create_model_info(
'asr-sentence',
_("This interface is used to recognize short audio files within 60 seconds. Supports Mandarin Chinese, English, Cantonese, Japanese, Vietnamese, Malay, Indonesian, Filipino, Thai, Portuguese, Turkish, Arabic, Hindi, French, German, and 23 Chinese dialects."),
ModelTypeConst.STT,
TencentSTTModelCredential,
TencentSpeechToText),
]
tencent_embedding_model_info = _create_model_info(

View File

@ -107,7 +107,7 @@ def is_valid_credential(provider, model_type, model_name, model_credential: Dict
def get_model_by_id(_id, workspace_id):
model = QuerySet(Model).filter(id=_id).first()
get_authorized_model = DatabaseModelManage.get_model("get_authorized_model")
if model and model.workspace_id!=workspace_id and get_authorized_model is not None:
if model and model.workspace_id != workspace_id and get_authorized_model is not None:
model = get_authorized_model(QuerySet(Model).filter(id=_id), workspace_id).first()
if model is None:
raise Exception(_("Model does not exist"))
@ -122,4 +122,5 @@ def get_model_instance_by_model_workspace_id(model_id, workspace_id, **kwargs):
@return: 模型实例
"""
model = get_model_by_id(model_id, workspace_id)
return ModelManage.get_model(model_id, lambda _id: get_model(model, **kwargs))
s = {p.get('field'): p.get('default_value') for p in model.model_params_form if p.get('default_value') is not None}
return ModelManage.get_model(model_id, lambda _id: get_model(model, **{**s, **kwargs}))

View File

@ -76,6 +76,7 @@ websockets = "15.0.1"
# 开发工具
pylint = "3.3.7"
tencentcloud-sdk-python-asr = "^3.0.1416"
[build-system]