feat: Support stt model params setting

2025-12-26 01:33:05 +00:00 · 2025-09-19 10:12:53 +08:00 · 2025-09-19 10:12:53 +08:00 · 71cec2fca4
parent 8d9c466ced
commit 71cec2fca4
11 changed files with 238 additions and 15 deletions
--- a/apps/application/migrations/0003_application_stt_model_params_setting_and_more.py
+++ b/apps/application/migrations/0003_application_stt_model_params_setting_and_more.py
@ -0,0 +1,23 @@
+# Generated by Django 5.2.4 on 2025-09-16 08:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('application', '0002_application_simple_mcp'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='application',
+            name='stt_model_params_setting',
+            field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
+        ),
+        migrations.AddField(
+            model_name='applicationversion',
+            name='stt_model_params_setting',
+            field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
+        ),
+    ]
--- a/apps/application/models/application.py
+++ b/apps/application/models/application.py
@ -72,6 +72,7 @@ class Application(AppModelMixin):
    model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
    model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
    tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
+    stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
    problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
    icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
    work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
@ -145,6 +146,7 @@ class ApplicationVersion(AppModelMixin):
    model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
    model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
    tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
+    stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
    problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
    icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
    work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
--- a/apps/application/serializers/application.py
+++ b/apps/application/serializers/application.py
@ -700,6 +700,7 @@ class ApplicationOperateSerializer(serializers.Serializer):
            'user_id': 'user_id', 'model_id': 'model_id', 'knowledge_setting': 'knowledge_setting',
            'model_setting': 'model_setting', 'model_params_setting': 'model_params_setting',
            'tts_model_params_setting': 'tts_model_params_setting',
+            'stt_model_params_setting': 'stt_model_params_setting',
            'problem_optimization': 'problem_optimization', 'icon': 'icon', 'work_flow': 'work_flow',
            'problem_optimization_prompt': 'problem_optimization_prompt', 'tts_model_id': 'tts_model_id',
            'stt_model_id': 'stt_model_id', 'tts_model_enable': 'tts_model_enable',
@ -785,6 +786,8 @@ class ApplicationOperateSerializer(serializers.Serializer):
                    instance['stt_autosend'] = node_data['stt_autosend']
                if 'tts_model_params_setting' in node_data:
                    instance['tts_model_params_setting'] = node_data['tts_model_params_setting']
+                if 'stt_model_params_setting' in node_data:
+                    instance['stt_model_params_setting'] = node_data['stt_model_params_setting']
                if 'file_upload_enable' in node_data:
                    instance['file_upload_enable'] = node_data['file_upload_enable']
                if 'file_upload_setting' in node_data:
@ -830,7 +833,7 @@ class ApplicationOperateSerializer(serializers.Serializer):
                       'knowledge_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
                       'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
                       'tts_autoplay', 'stt_autosend', 'file_upload_enable', 'file_upload_setting',
-                       'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting',
+                       'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting', 'stt_model_params_setting',
                       'mcp_enable', 'mcp_tool_ids', 'mcp_servers', 'mcp_source', 'tool_enable', 'tool_ids', 'mcp_output_enable',
                       'problem_optimization_prompt', 'clean_time', 'folder_id']
        for update_key in update_keys:
--- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/asr_stt.py
+++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/asr_stt.py
@ -60,6 +60,7 @@ class AliyunBaiLianAsrSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                model=self.model,
                messages=messages,
                result_format="message",
+                **self.params
            )
            if response.status_code == 200:
                text = response["output"]["choices"][0]["message"].content[0]["text"]
--- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py
+++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py
@ -77,6 +77,7 @@ class AliyunBaiLianOmiSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                # stream 必须设置为 True，否则会报错
                stream=True,
                stream_options={"include_usage": True},
+                extra_body=self.params
            )
            result = []
            for chunk in completion:
--- a/apps/models_provider/impl/tencent_model_provider/model/stt.py
+++ b/apps/models_provider/impl/tencent_model_provider/model/stt.py
@ -69,6 +69,7 @@ class TencentSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                "SourceType": 1,
                "VoiceFormat": "mp3",
                "Data": _v.decode(),
+                **self.params
            }
            req.from_json_string(json.dumps(params))

--- a/apps/models_provider/impl/xf_model_provider/model/zh_en_stt.py
+++ b/apps/models_provider/impl/xf_model_provider/model/zh_en_stt.py
@ -22,11 +22,25 @@ ssl_context.check_hostname = False
 ssl_context.verify_mode = ssl.CERT_NONE


+def deep_merge_dict(target_dict, source_dict):
+
+    if not isinstance(source_dict, dict):
+        return source_dict
+    result = target_dict.copy() if isinstance(target_dict, dict) else {}
+    for key, value in source_dict.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = deep_merge_dict(result[key], value)
+        else:
+            result[key] = value
+    return result
+
+
 class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
    spark_app_id: str
    spark_api_key: str
    spark_api_secret: str
    spark_api_url: str
+    params: dict

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
@ -34,6 +48,7 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
        self.spark_app_id = kwargs.get('spark_app_id')
        self.spark_api_key = kwargs.get('spark_api_key')
        self.spark_api_secret = kwargs.get('spark_api_secret')
+        self.params = kwargs.get('params')

    @staticmethod
    def is_cache_model():
@ -41,17 +56,14 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):

    @staticmethod
    def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
-        optional_params = {}
-        if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
-            optional_params['max_tokens'] = model_kwargs['max_tokens']
-        if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
-            optional_params['temperature'] = model_kwargs['temperature']
+
        return XFZhEnSparkSpeechToText(
            spark_app_id=model_credential.get('spark_app_id'),
            spark_api_key=model_credential.get('spark_api_key'),
            spark_api_secret=model_credential.get('spark_api_secret'),
            spark_api_url=model_credential.get('spark_api_url'),
-            **optional_params
+            params=model_kwargs,
+            **model_kwargs
        )

    # 生成url
@ -106,6 +118,10 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
            maxkb_logger.error(f"语音识别错误: {str(err)}: {traceback.format_exc()}")
            return ""

+    def merge_params_to_frame(self, frame,params):
+
+        return deep_merge_dict(frame, params)
+
    async def send_audio(self, ws, audio_file):
        """发送音频数据"""
        chunk_size = 4000
@ -123,8 +139,11 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                    "header": {"app_id": self.spark_app_id, "status": 0},
                    "parameter": {
                        "iat": {
-                            "domain": "slm", "language": "zh_cn", "accent": "mandarin",
-                            "eos": 10000, "vinfo": 1,
+                            "domain": "slm",
+                            "language": "zh_cn",
+                            "accent": "mandarin",
+                            "eos": 10000,
+                            "vinfo": 1,
                            "result": {"encoding": "utf8", "compress": "raw", "format": "json"}
                        }
                    },
@ -135,6 +154,9 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                        }
                    }
                }
+                frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
+                                            not ['model_id', 'use_local', 'streaming'].__contains__(key)})
+
            # 中间帧
            else:
                frame = {
@ -147,6 +169,9 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                    }
                }

+                frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
+                                            not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})
+
            await ws.send(json.dumps(frame))
            seq += 1

@ -160,17 +185,19 @@ class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
                }
            }
        }
+
+        end_frame = self.merge_params_to_frame(end_frame,{key: value for key, value in self.params.items() if
+                                            not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})
+
        await ws.send(json.dumps(end_frame))

-
-# 接受信息处理器
+    # 接受信息处理器
    async def handle_message(self, ws):
        result_text = ""
        while True:
            try:
                message = await asyncio.wait_for(ws.recv(), timeout=30.0)
                data = json.loads(message)
-
                if data['header']['code'] != 0:
                    raise Exception("")

--- a/ui/src/api/type/application.ts
+++ b/ui/src/api/type/application.ts
@ -17,6 +17,7 @@ interface ApplicationFormType {
  work_flow?: any
  model_params_setting?: any
  tts_model_params_setting?: any
+  stt_model_params_setting?: any
  stt_model_id?: string
  tts_model_id?: string
  stt_model_enable?: boolean
--- a/ui/src/views/application/ApplicationSetting.vue
+++ b/ui/src/views/application/ApplicationSetting.vue
@ -482,14 +482,28 @@
                      </div>
                    </div>
                  </template>
-                  <ModelSelect
+                  <div class="flex-between w-full">
+                    <ModelSelect
                    v-show="applicationForm.stt_model_enable"
                    v-model="applicationForm.stt_model_id"
                    :placeholder="$t('views.application.form.voiceInput.placeholder')"
                    :options="sttModelOptions"
+                    @change="sttModelChange"
                    :model-type="'STT'"
-                  >
-                  </ModelSelect>
+                    >
+                    </ModelSelect>
+
+                    <el-button
+                      v-if="applicationForm.stt_model_enable"
+                      @click="openSTTParamSettingDialog"
+                      :disabled="!applicationForm.stt_model_id"
+                      class="ml-8"
+                    >
+                      <el-icon>
+                        <Operation />
+                      </el-icon>
+                    </el-button>
+                  </div>
                </el-form-item>
                <el-form-item
                  prop="tts_model_id"
@ -583,6 +597,7 @@
    <AIModeParamSettingDialog ref="AIModeParamSettingDialogRef" @refresh="refreshForm" />
    <GeneratePromptDialog @replace="replace" ref="GeneratePromptDialogRef" />
    <TTSModeParamSettingDialog ref="TTSModeParamSettingDialogRef" @refresh="refreshTTSForm" />
+    <STTModeParamSettingDialog ref="STTModeParamSettingDialogRef" @refresh="refreshSTTForm" />
    <ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
    <AddKnowledgeDialog
      ref="AddKnowledgeDialogRef"
@ -612,6 +627,7 @@ import { relatedObject } from '@/utils/array'
 import { MsgSuccess, MsgWarning } from '@/utils/message'
 import { t } from '@/locales'
 import TTSModeParamSettingDialog from './component/TTSModeParamSettingDialog.vue'
+import STTModeParamSettingDialog from './component/STTModelParamSettingDialog.vue'
 import ReasoningParamSettingDialog from './component/ReasoningParamSettingDialog.vue'
 import permissionMap from '@/permission'
 import { EditionConst } from '@/utils/permission/data'
@ -652,6 +668,7 @@ const optimizationPrompt =
 const AIModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
 const ReasoningParamSettingDialogRef = ref<InstanceType<typeof ReasoningParamSettingDialog>>()
 const TTSModeParamSettingDialogRef = ref<InstanceType<typeof TTSModeParamSettingDialog>>()
+const STTModeParamSettingDialogRef = ref<InstanceType<typeof STTModeParamSettingDialog>>()  
 const ParamSettingDialogRef = ref<InstanceType<typeof ParamSettingDialog>>()
 const GeneratePromptDialogRef = ref<InstanceType<typeof GeneratePromptDialog>>()

@ -756,6 +773,7 @@ const submit = async (formEl: FormInstance | undefined) => {
  if (!formEl) return
  await formEl.validate((valid, fields) => {
    if (valid) {
+      console.log(applicationForm.value)
      loadSharedApi({ type: 'application', systemType: apiType.value })
        .putApplication(id, applicationForm.value, loading)
        .then(() => {
@ -806,6 +824,17 @@ const openTTSParamSettingDialog = () => {
  }
 }

+const openSTTParamSettingDialog = () => {
+  if (applicationForm.value.stt_model_id) {
+    STTModeParamSettingDialogRef.value?.open(
+      applicationForm.value.stt_model_id,
+      id,
+      applicationForm.value.stt_model_params_setting,
+    )
+  }
+}
+
+
 const openParamSettingDialog = () => {
  ParamSettingDialogRef.value?.open(applicationForm.value)
 }
@ -905,6 +934,10 @@ function refreshTTSForm(data: any) {
  applicationForm.value.tts_model_params_setting = data
 }

+function refreshSTTForm(data: any) {
+  applicationForm.value.stt_model_params_setting = data
+}
+
 function removeKnowledge(id: any) {
  if (applicationForm.value.knowledge_id_list) {
    applicationForm.value.knowledge_id_list.splice(
@ -1022,6 +1055,14 @@ function ttsModelChange() {
  }
 }

+function sttModelChange() {
+  if (applicationForm.value.stt_model_id) {
+    STTModeParamSettingDialogRef.value?.reset_default(applicationForm.value.stt_model_id, id)
+  } else {
+    refreshSTTForm({})
+  }
+}
+
 function ttsModelEnableChange() {
  if (!applicationForm.value.tts_model_enable) {
    applicationForm.value.tts_model_id = undefined
--- a/ui/src/views/application/component/STTModelParamSettingDialog.vue
+++ b/ui/src/views/application/component/STTModelParamSettingDialog.vue
@ -0,0 +1,122 @@
+<template>
+    <el-dialog
+      align-center
+      :title="$t('common.paramSetting')"
+      v-model="dialogVisible"
+      style="width: 550px"
+      append-to-body
+      :close-on-click-modal="false"
+      :close-on-press-escape="false"
+    >
+    <DynamicsForm
+      v-model="form_data"
+      :model="form_data"      
+      label-position="top"
+      require-asterisk-position="right"
+      :render_data="model_form_field"
+      ref="dynamicsFormRef"
+    >
+    </DynamicsForm>
+
+    <template #footer>
+      <div class="flex-between">
+        <span class="dialog-footer">
+          <el-button @click.prevent="dialogVisible = false">
+            {{ $t('common.cancel') }}
+          </el-button>
+          <el-button type="primary" @click="submit" :loading="loading">
+            {{ $t('common.confirm') }}
+          </el-button>
+        </span>
+      </div>
+    </template>
+    </el-dialog>
+</template>
+
+
+
+<script setup lang="ts">
+import type { FormField } from '@/components/dynamics-form/type'
+import { loadSharedApi } from '@/utils/dynamics-api/shared-api'
+import DynamicsForm from '@/components/dynamics-form/index.vue'
+import { ref, computed } from 'vue'
+import { useRoute } from 'vue-router'
+
+
+const route = useRoute()
+
+const {
+  params: { id }
+} = route as any
+const apiType = computed(() => {
+  if (route.path.includes('resource-management')) {
+    return 'systemManage'
+  } else {
+    return 'workspace'
+  }
+})
+
+const dialogVisible = ref<boolean>(false)
+const form_data = ref<any>({})
+const dynamicsFormRef = ref<InstanceType<typeof DynamicsForm>>()
+const stt_model_id = ref<string>('')
+const loading = ref<boolean>(false)
+const model_form_field = ref<Array<FormField>>([])
+const emit = defineEmits(['refresh'])
+
+const open = (model_id: string, application_id?: string, model_setting_data?: any) => {
+  form_data.value = {}
+  stt_model_id.value = model_id
+  loadSharedApi({ type: 'model', systemType: apiType.value })
+    .getModelParamsForm(model_id, loading)  
+    .then(( ok: any ) => {
+      model_form_field.value = ok.data
+      const res = ok.data
+        .map((item: any) => ({
+          [item.field]: item.show_default_value !== false ? item.default_value : undefined,
+        }))
+        .reduce((x: any, y: any) => ({ ...x, ...y }), {})
+
+      if (model_setting_data) {
+        Object.keys(model_setting_data).forEach((key) => {
+          if (!(key in res)) {
+            delete model_setting_data[key]
+          }
+        })
+      }
+      model_setting_data = { ...res, ...model_setting_data }
+      // 渲染动态表单
+      dynamicsFormRef.value?.render(model_form_field.value, model_setting_data)
+    })
+  dialogVisible.value = true
+}
+
+const submit = async () => {
+  dynamicsFormRef.value?.validate().then(() => {
+      emit('refresh', form_data.value)
+      dialogVisible.value = false
+  })
+}
+
+const reset_default = (model_id: string, application_id?: string) => {
+  loadSharedApi({ type: 'model', systemType: apiType.value })
+    .getModelParamsForm(model_id, loading)
+    .then((ok: any) => {
+      model_form_field.value = ok.data
+      const model_setting_data = ok.data
+        .map((item: any) => ({
+          [item.field]: item.show_default_value !== false ? item.default_value : undefined,
+        }))
+        .reduce((x: any, y: any) => (({ ...x, ...y })), {})
+
+        emit('refresh', model_setting_data)
+    })
+}
+
+
+
+defineExpose({ open, reset_default })
+
+</script>
+
+<style lang="scss" scoped></style>
--- a/ui/src/views/model/component/ModelCard.vue
+++ b/ui/src/views/model/component/ModelCard.vue
@ -90,6 +90,7 @@
            <el-dropdown-item
              v-if="
                (currentModel.model_type === 'TTS' ||
+                  currentModel.model_type === 'STT' ||
                  currentModel.model_type === 'LLM' ||
                  currentModel.model_type === 'IMAGE' ||
                  currentModel.model_type === 'TTI' ||