From 3670ec27de93ef5d443d7f91a951f7752c78bec5 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Mon, 24 Nov 2025 10:33:17 +0800 Subject: [PATCH] feat: enhance image generation by supporting multiple model types --- .../credential/tti.py | 4 +- .../model/tti.py | 61 ++++++++++++++----- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py index 82f1d7185..7825501e4 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py @@ -9,6 +9,7 @@ from common.forms import BaseForm, PasswordInputField, SingleSelect, SliderField from models_provider.base_model_provider import BaseModelCredential, ValidCode from common.utils.logger import maxkb_logger + class QwenModelParams(BaseForm): """ Parameters class for the Qwen Text-to-Image model. @@ -26,7 +27,8 @@ class QwenModelParams(BaseForm): {'value': '1280*720', 'label': '1280*720'}, ], text_field='label', - value_field='value' + value_field='value', + attrs={'allow-create': True, 'filterable': True} ) n = SliderField( diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py index 6ff912482..2ca3696af 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py @@ -2,7 +2,7 @@ from http import HTTPStatus from typing import Dict -from dashscope import ImageSynthesis +from dashscope import ImageSynthesis, MultiModalConversation from django.utils.translation import gettext from langchain_community.chat_models import ChatTongyi from langchain_core.messages import HumanMessage @@ -46,17 +46,48 @@ class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage): chat.invoke([HumanMessage([{"type": "text", "text": gettext('Hello')}])]) def generate_image(self, prompt: str, negative_prompt: str = None): - rsp = ImageSynthesis.call(api_key=self.api_key, - model=self.model_name, - base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', - prompt=prompt, - negative_prompt=negative_prompt, - **self.params) - file_urls = [] - if rsp.status_code == HTTPStatus.OK: - for result in rsp.output.results: - file_urls.append(result.url) - else: - maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % - (rsp.status_code, rsp.code, rsp.message)) - return file_urls + if self.model_name.startswith("wan"): + rsp = ImageSynthesis.call(api_key=self.api_key, + model=self.model_name, + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', + prompt=prompt, + negative_prompt=negative_prompt, + **self.params) + file_urls = [] + if rsp.status_code == HTTPStatus.OK: + for result in rsp.output.results: + file_urls.append(result.url) + else: + maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % + (rsp.status_code, rsp.code, rsp.message)) + return file_urls + elif self.model_name.startswith("qwen"): + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": prompt + } + ] + } + ] + rsp = MultiModalConversation.call( + api_key=self.api_key, + model=self.model_name, + messages=messages, + result_format='message', + base_url='https://dashscope.aliyuncs.com/v1', + stream=False, + negative_prompt=negative_prompt, + **self.params + ) + file_urls = [] + if rsp.status_code == HTTPStatus.OK: + for result in rsp.output.choices: + file_urls.append(result.message.content[0].get('image')) + else: + maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % + (rsp.status_code, rsp.code, rsp.message)) + return file_urls