fix: tti model (#2060)

2025-12-28 23:32:48 +00:00 · 2025-01-21 17:50:01 +08:00 · 2025-01-21 17:50:01 +08:00 · 5009a28853
parent 7ac65aa342
commit 5009a28853
24 changed files with 47 additions and 55 deletions
--- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/llm.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-from typing import List, Dict
+from typing import Dict

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
--- a/apps/setting/models_provider/impl/aws_bedrock_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/aws_bedrock_model_provider/model/llm.py
@ -1,8 +1,10 @@
-from typing import List, Dict
 import os
 import re
+from typing import Dict
+
 from botocore.config import Config
 from langchain_community.chat_models import BedrockChat
+
 from setting.models_provider.base_model_provider import MaxKBBaseModel


--- a/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py
+++ b/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py
@ -7,13 +7,10 @@
    @desc:
 """

-from typing import List, Dict, Optional, Any, Iterator, Type
+from typing import List, Dict

-from langchain_core.callbacks import CallbackManagerForLLMRun
-from langchain_core.messages import BaseMessage, get_buffer_string, BaseMessageChunk, AIMessageChunk
-from langchain_core.outputs import ChatGenerationChunk
+from langchain_core.messages import BaseMessage, get_buffer_string
 from langchain_openai import AzureChatOpenAI
-from langchain_openai.chat_models.base import _convert_delta_to_message_chunk

 from common.config.tokenizer_manage_config import TokenizerManage
 from setting.models_provider.base_model_provider import MaxKBBaseModel
--- a/apps/setting/models_provider/impl/azure_model_provider/model/stt.py
+++ b/apps/setting/models_provider/impl/azure_model_provider/model/stt.py
@ -1,8 +1,7 @@
-import asyncio
 import io
 from typing import Dict

-from openai import OpenAI, AzureOpenAI
+from openai import AzureOpenAI

 from common.config.tokenizer_manage_config import TokenizerManage
 from setting.models_provider.base_model_provider import MaxKBBaseModel
@ -61,4 +60,3 @@ class AzureOpenAISpeechToText(MaxKBBaseModel, BaseSpeechToText):
        buffer.name = "file.mp3"  # this is the important line
        res = client.audio.transcriptions.create(model=self.model, language="zh", file=buffer)
        return res.text
-
--- a/apps/setting/models_provider/impl/base_chat_open_ai.py
+++ b/apps/setting/models_provider/impl/base_chat_open_ai.py
@ -1,6 +1,7 @@
 # coding=utf-8

-from typing import List, Dict, Optional, Any, Iterator, Type, cast
+from typing import List, Dict, Optional, Any, Iterator, cast
+
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.messages import BaseMessage, get_buffer_string
 from langchain_core.outputs import ChatGenerationChunk, ChatGeneration
--- a/apps/setting/models_provider/impl/deepseek_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/deepseek_model_provider/model/llm.py
@ -6,7 +6,7 @@
@Author  ：Brian Yang
@Date    ：5/12/24 7:44 AM 
 """
-from typing import List, Dict
+from typing import Dict

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
@ -29,4 +29,3 @@ class DeepSeekChatModel(MaxKBBaseModel, BaseChatOpenAI):
            **optional_params
        )
        return deepseek_chat_open_ai
-
--- a/apps/setting/models_provider/impl/gemini_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/gemini_model_provider/model/llm.py
@ -11,14 +11,14 @@ from typing import List, Dict, Optional, Sequence, Union, Any, Iterator, cast
 from google.ai.generativelanguage_v1 import GenerateContentResponse
 from google.generativeai.responder import ToolDict
 from google.generativeai.types import FunctionDeclarationType, SafetySettingDict
+from google.generativeai.types import Tool as GoogleTool
 from langchain_core.callbacks import CallbackManagerForLLMRun
-from langchain_core.messages import BaseMessage, get_buffer_string
+from langchain_core.messages import BaseMessage
 from langchain_core.outputs import ChatGenerationChunk
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_genai._function_utils import _ToolConfigDict
 from langchain_google_genai.chat_models import _chat_with_retry, _response_to_result
-from google.generativeai.types import Tool as GoogleTool
-from common.config.tokenizer_manage_config import TokenizerManage
+
 from setting.models_provider.base_model_provider import MaxKBBaseModel


--- a/apps/setting/models_provider/impl/kimi_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/kimi_model_provider/model/llm.py
@ -6,7 +6,7 @@
    @date：2023/11/10 17:45
    @desc:
 """
-from typing import List, Dict
+from typing import Dict

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
--- a/apps/setting/models_provider/impl/openai_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/openai_model_provider/model/llm.py
@ -6,11 +6,9 @@
    @date：2024/4/18 15:28
    @desc:
 """
-from typing import List, Dict, Optional, Any
+from typing import List, Dict

-from langchain_core.language_models import LanguageModelInput
 from langchain_core.messages import BaseMessage, get_buffer_string
-from langchain_core.runnables import RunnableConfig
 from langchain_openai.chat_models import ChatOpenAI

 from common.config.tokenizer_manage_config import TokenizerManage
--- a/apps/setting/models_provider/impl/qwen_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/qwen_model_provider/model/llm.py
@ -12,11 +12,10 @@ from langchain_community.chat_models import ChatTongyi
 from langchain_community.llms.tongyi import generate_with_last_element_mark
 from langchain_core.callbacks import CallbackManagerForLLMRun
 from langchain_core.language_models import LanguageModelInput
-from langchain_core.messages import BaseMessage, get_buffer_string
+from langchain_core.messages import BaseMessage
 from langchain_core.outputs import ChatGenerationChunk, ChatGeneration
 from langchain_core.runnables import RunnableConfig, ensure_config

-from common.config.tokenizer_manage_config import TokenizerManage
 from setting.models_provider.base_model_provider import MaxKBBaseModel


--- a/apps/setting/models_provider/impl/qwen_model_provider/model/tti.py
+++ b/apps/setting/models_provider/impl/qwen_model_provider/model/tti.py
@ -3,12 +3,13 @@ from http import HTTPStatus
 from typing import Dict

 from dashscope import ImageSynthesis
+from django.utils.translation import gettext as __
 from langchain_community.chat_models import ChatTongyi
 from langchain_core.messages import HumanMessage

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tti import BaseTextToImage
-from django.utils.translation import gettext_lazy as _
+

 class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage):
    api_key: str
@ -39,7 +40,7 @@ class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage):

    def check_auth(self):
        chat = ChatTongyi(api_key=self.api_key, model_name='qwen-max')
-        chat.invoke([HumanMessage([{"type": "text", "text": _('Hello')}])])
+        chat.invoke([HumanMessage([{"type": "text", "text": __('Hello')}])])

    def generate_image(self, prompt: str, negative_prompt: str = None):
        # api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
--- a/apps/setting/models_provider/impl/tencent_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/tencent_model_provider/model/llm.py
@ -2,8 +2,8 @@

 from typing import List, Dict, Optional, Any

-from langchain_core.messages import BaseMessage, get_buffer_string
-from common.config.tokenizer_manage_config import TokenizerManage
+from langchain_core.messages import BaseMessage
+
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.tencent_model_provider.model.hunyuan import ChatHunyuan

--- a/apps/setting/models_provider/impl/tencent_model_provider/model/tti.py
+++ b/apps/setting/models_provider/impl/tencent_model_provider/model/tti.py
@ -3,6 +3,7 @@
 import json
 from typing import Dict

+from django.utils.translation import gettext as __
 from tencentcloud.common import credential
 from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
 from tencentcloud.common.profile.client_profile import ClientProfile
@ -12,7 +13,7 @@ from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tti import BaseTextToImage
 from setting.models_provider.impl.tencent_model_provider.model.hunyuan import ChatHunyuan
-from django.utils.translation import gettext_lazy as _
+

 class TencentTextToImageModel(MaxKBBaseModel, BaseTextToImage):
    hunyuan_secret_id: str
@ -50,7 +51,7 @@ class TencentTextToImageModel(MaxKBBaseModel, BaseTextToImage):
                           hunyuan_secret_id=self.hunyuan_secret_id,
                           hunyuan_secret_key=self.hunyuan_secret_key,
                           model="hunyuan-standard")
-        res = chat.invoke(_('Hello'))
+        res = chat.invoke(__('Hello'))
        # print(res)

    def generate_image(self, prompt: str, negative_prompt: str = None):
--- a/apps/setting/models_provider/impl/vllm_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/vllm_model_provider/model/llm.py
@ -1,7 +1,8 @@
 # coding=utf-8

-from typing import List, Dict
+from typing import Dict
 from urllib.parse import urlparse, ParseResult
+
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI

--- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/stt.py
+++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/stt.py
@ -12,14 +12,14 @@ import gzip
 import hmac
 import json
 import os
+import ssl
 import uuid
 import wave
-from enum import Enum
 from hashlib import sha256
 from io import BytesIO
 from typing import Dict
 from urllib.parse import urlparse
-import ssl
+
 import websockets

 from setting.models_provider.base_model_provider import MaxKBBaseModel
@ -305,7 +305,8 @@ class VolcanicEngineSpeechToText(MaxKBBaseModel, BaseSpeechToText):
            res = await ws.recv()
            result = parse_response(res)
            if 'payload_msg' in result and result['payload_msg']['code'] != self.success_code:
-                raise Exception(f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}")
+                raise Exception(
+                    f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}")
            for seq, (chunk, last) in enumerate(VolcanicEngineSpeechToText.slice_data(wav_data, segment_size), 1):
                # if no compression, comment this line
                payload_bytes = gzip.compress(chunk)
--- a/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tti.py
+++ b/apps/setting/models_provider/impl/volcanic_engine_model_provider/model/tti.py
@ -16,7 +16,6 @@ import sys
 from typing import Dict

 import requests
-from langchain_openai import ChatOpenAI

 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tti import BaseTextToImage
--- a/apps/setting/models_provider/impl/wenxin_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/wenxin_model_provider/model/llm.py
@ -6,17 +6,17 @@
    @date：2023/11/10 17:45
    @desc:
 """
-import uuid
 from typing import List, Dict, Optional, Any, Iterator

 from langchain_community.chat_models.baidu_qianfan_endpoint import _convert_dict_to_message, QianfanChatEndpoint
 from langchain_core.callbacks import CallbackManagerForLLMRun
-from langchain_core.outputs import ChatGenerationChunk
-from setting.models_provider.base_model_provider import MaxKBBaseModel
 from langchain_core.messages import (
    AIMessageChunk,
    BaseMessage,
 )
+from langchain_core.outputs import ChatGenerationChunk
+
+from setting.models_provider.base_model_provider import MaxKBBaseModel


 class QianfanChatModel(MaxKBBaseModel, QianfanChatEndpoint):
--- a/apps/setting/models_provider/impl/xf_model_provider/model/stt.py
+++ b/apps/setting/models_provider/impl/xf_model_provider/model/stt.py
@ -10,10 +10,11 @@ import hmac
 import json
 import logging
 import os
+import ssl
 from datetime import datetime, UTC
 from typing import Dict
 from urllib.parse import urlencode, urlparse
-import ssl
+
 import websockets

 from setting.models_provider.base_model_provider import MaxKBBaseModel
@ -29,6 +30,7 @@ ssl_context.verify_mode = ssl.CERT_NONE

 max_kb = logging.getLogger("max_kb")

+
 class XFSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
    spark_app_id: str
    spark_api_key: str
@ -94,7 +96,7 @@ class XFSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
    def check_auth(self):
        cwd = os.path.dirname(os.path.abspath(__file__))
        with open(f'{cwd}/iat_mp3_16k.mp3', 'rb') as f:
-             self.speech_to_text(f)
+            self.speech_to_text(f)

    def speech_to_text(self, file):
        async def handle():
--- a/apps/setting/models_provider/impl/xf_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/xf_model_provider/model/tts.py
@ -11,17 +11,17 @@ import hashlib
 import hmac
 import json
 import logging
-import os
+import ssl
 from datetime import datetime, UTC
 from typing import Dict
 from urllib.parse import urlencode, urlparse
-import ssl
+
 import websockets
+from django.utils.translation import gettext as __

 from common.util.common import _remove_empty_lines
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tts import BaseTextToSpeech
-from django.utils.translation import gettext_lazy as _

 max_kb = logging.getLogger("max_kb")

@ -98,7 +98,7 @@ class XFSparkTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
        return url

    def check_auth(self):
-        self.text_to_speech(_('Hello'))
+        self.text_to_speech(__('Hello'))

    def text_to_speech(self, text):

--- a/apps/setting/models_provider/impl/xinference_model_provider/model/embedding.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/model/embedding.py
@ -2,7 +2,6 @@
 import threading
 from typing import Dict, Optional, List, Any

-from langchain_community.embeddings import XinferenceEmbeddings
 from langchain_core.embeddings import Embeddings

 from setting.models_provider.base_model_provider import MaxKBBaseModel
--- a/apps/setting/models_provider/impl/xinference_model_provider/model/llm.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/model/llm.py
@ -1,12 +1,8 @@
 # coding=utf-8

-from typing import Dict, Optional, List, Any, Iterator
+from typing import Dict
 from urllib.parse import urlparse, ParseResult

-from langchain_core.language_models import LanguageModelInput
-from langchain_core.messages import BaseMessageChunk
-from langchain_core.runnables import RunnableConfig
-
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI

--- a/apps/setting/models_provider/impl/xinference_model_provider/model/stt.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/model/stt.py
@ -1,4 +1,3 @@
-import asyncio
 import io
 from typing import Dict

@ -56,4 +55,3 @@ class XInferenceSpeechToText(MaxKBBaseModel, BaseSpeechToText):
        buffer.name = "file.mp3"  # this is the important line
        res = client.audio.transcriptions.create(model=self.model, language="zh", file=buffer)
        return res.text
-
--- a/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py
+++ b/apps/setting/models_provider/impl/xinference_model_provider/model/tts.py
@ -6,7 +6,7 @@ from common.config.tokenizer_manage_config import TokenizerManage
 from common.util.common import _remove_empty_lines
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tts import BaseTextToSpeech
-from django.utils.translation import gettext_lazy as _
+from django.utils.translation import gettext as __


 def custom_get_token_ids(text: str):
@ -41,7 +41,7 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
        )

    def check_auth(self):
-        self.text_to_speech(_('Hello'))
+        self.text_to_speech(__('Hello'))

    def text_to_speech(self, text):
        client = OpenAI(
@ -58,4 +58,4 @@ class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
            return response.read()

    def is_cache_model(self):
-        return False
+        return False
--- a/apps/setting/models_provider/impl/zhipu_model_provider/model/tti.py
+++ b/apps/setting/models_provider/impl/zhipu_model_provider/model/tti.py
@ -1,5 +1,6 @@
 from typing import Dict

+from django.utils.translation import gettext as __
 from langchain_community.chat_models import ChatZhipuAI
 from langchain_core.messages import HumanMessage
 from zhipuai import ZhipuAI
@ -7,7 +8,6 @@ from zhipuai import ZhipuAI
 from common.config.tokenizer_manage_config import TokenizerManage
 from setting.models_provider.base_model_provider import MaxKBBaseModel
 from setting.models_provider.impl.base_tti import BaseTextToImage
-from django.utils.translation import gettext_lazy as _


 def custom_get_token_ids(text: str):
@ -46,7 +46,7 @@ class ZhiPuTextToImage(MaxKBBaseModel, BaseTextToImage):
            zhipuai_api_key=self.api_key,
            model_name=self.model,
        )
-        chat.invoke([HumanMessage([{"type": "text", "text": _('Hello')}])])
+        chat.invoke([HumanMessage([{"type": "text", "text": __('Hello')}])])

        # self.generate_image('生成一个小猫图片')