diff --git a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py index 732057048..7f57b4340 100644 --- a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py +++ b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py @@ -77,8 +77,6 @@ class BaseImageUnderstandNode(IImageUnderstandNode): image, **kwargs) -> NodeResult: # 处理不正确的参数 - if image is None or not isinstance(image, list): - image = [] workspace_id = self.workflow_manage.get_body().get('workspace_id') image_model = get_model_instance_by_model_workspace_id(model_id, workspace_id, **model_params_setting) @@ -91,7 +89,7 @@ class BaseImageUnderstandNode(IImageUnderstandNode): message_list = self.generate_message_list(image_model, system, prompt, self.get_history_message(history_chat_record, dialogue_number), image) self.context['message_list'] = message_list - self.context['image_list'] = image + self.generate_context_image(image) self.context['dialogue_type'] = dialogue_type if stream: r = image_model.stream(message_list) @@ -104,6 +102,12 @@ class BaseImageUnderstandNode(IImageUnderstandNode): 'history_message': history_message, 'question': question.content}, {}, _write_context=write_context) + def generate_context_image(self, image): + if isinstance(image, str) and image.startswith('http'): + self.context['image_list'] = [{'url': image}] + elif image is not None and len(image) > 0: + self.context['image_list'] = image + def get_history_message_for_details(self, history_chat_record, dialogue_number): start_index = len(history_chat_record) - dialogue_number history_message = reduce(lambda x, y: [*x, *y], [ @@ -164,28 +168,32 @@ class BaseImageUnderstandNode(IImageUnderstandNode): def generate_prompt_question(self, prompt): return HumanMessage(self.workflow_manage.generate_prompt(prompt)) - def generate_message_list(self, image_model, system: str, prompt: str, history_message, image): - if image is not None and len(image) > 0: - # 处理多张图片 - images = [] + def _process_images(self, image): + """ + 处理图像数据,转换为模型可识别的格式 + """ + images = [] + if isinstance(image, str) and image.startswith('http'): + images.append({'type': 'image_url', 'image_url': {'url': image}}) + elif image is not None and len(image) > 0: for img in image: - if isinstance(img, str) and img.startswith('http'): - images.append({'type': 'image_url', 'image_url': {'url': img}}) - else: - file_id = img['file_id'] - file = QuerySet(File).filter(id=file_id).first() - image_bytes = file.get_bytes() - base64_image = base64.b64encode(image_bytes).decode("utf-8") - image_format = what(None, image_bytes) - images.append( - {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}}) - messages = [HumanMessage( - content=[ - {'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)}, - *images - ])] + file_id = img['file_id'] + file = QuerySet(File).filter(id=file_id).first() + image_bytes = file.get_bytes() + base64_image = base64.b64encode(image_bytes).decode("utf-8") + image_format = what(None, image_bytes) + images.append( + {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}}) + return images + + def generate_message_list(self, image_model, system: str, prompt: str, history_message, image): + prompt_text = self.workflow_manage.generate_prompt(prompt) + images = self._process_images(image) + + if images: + messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *images])] else: - messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))] + messages = [HumanMessage(prompt_text)] if system is not None and len(system) > 0: return [ diff --git a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py index 48d764212..644e9ea3e 100644 --- a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py +++ b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py @@ -1,5 +1,6 @@ # coding=utf-8 import base64 +import mimetypes import time from functools import reduce from imghdr import what @@ -76,9 +77,6 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): chat_record_id, video, **kwargs) -> NodeResult: - # 处理不正确的参数 - if video is None or not isinstance(video, list): - video = [] workspace_id = self.workflow_manage.get_body().get('workspace_id') video_model = get_model_instance_by_model_workspace_id(model_id, workspace_id, **model_params_setting) @@ -91,7 +89,7 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): message_list = self.generate_message_list(video_model, system, prompt, self.get_history_message(history_chat_record, dialogue_number), video) self.context['message_list'] = message_list - self.context['video_list'] = video + self.generate_context_video(video) self.context['dialogue_type'] = dialogue_type if stream: r = video_model.stream(message_list) @@ -104,6 +102,12 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): 'history_message': history_message, 'question': question.content}, {}, _write_context=write_context) + def generate_context_video(self, video): + if isinstance(video, str) and video.startswith('http'): + self.context['video_list'] = [{'url': video}] + elif video is not None and len(video) > 0: + self.context['video_list'] = video + def get_history_message_for_details(self, history_chat_record, dialogue_number): start_index = len(history_chat_record) - dialogue_number history_message = reduce(lambda x, y: [*x, *y], [ @@ -164,28 +168,29 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): def generate_prompt_question(self, prompt): return HumanMessage(self.workflow_manage.generate_prompt(prompt)) + def _process_videos(self, image): + videos = [] + if isinstance(image, str) and image.startswith('http'): + videos.append({'type': 'video_url', 'video_url': {'url': image}}) + elif image is not None and len(image) > 0: + for img in image: + file_id = img['file_id'] + file = QuerySet(File).filter(id=file_id).first() + video_bytes = file.get_bytes() + base64_video = base64.b64encode(video_bytes).decode("utf-8") + video_format = mimetypes.guess_type(file.file_name)[0] # 获取MIME类型 + videos.append( + {'type': 'video_url', 'video_url': {'url': f'data:{video_format};base64,{base64_video}'}}) + return videos + def generate_message_list(self, video_model, system: str, prompt: str, history_message, video): - if video is not None and len(video) > 0: - # 处理多张图片 - videos = [] - for img in video: - if isinstance(img, str) and img.startswith('http'): - videos.append({'type': 'video_url', 'video_url': {'url': img}}) - else: - file_id = img['file_id'] - file = QuerySet(File).filter(id=file_id).first() - video_bytes = file.get_bytes() - base64_video = base64.b64encode(video_bytes).decode("utf-8") - video_format = what(None, video_bytes) - videos.append( - {'type': 'video_url', 'video_url': {'url': f'data:video/{video_format};base64,{base64_video}'}}) - messages = [HumanMessage( - content=[ - {'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)}, - *videos - ])] + prompt_text = self.workflow_manage.generate_prompt(prompt) + videos = self._process_videos(video) + + if videos: + messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *videos])] else: - messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))] + messages = [HumanMessage(prompt_text)] if system is not None and len(system) > 0: return [ diff --git a/ui/src/components/ai-chat/component/chat-input-operate/index.vue b/ui/src/components/ai-chat/component/chat-input-operate/index.vue index 54e6baa44..1d8cce0ad 100644 --- a/ui/src/components/ai-chat/component/chat-input-operate/index.vue +++ b/ui/src/components/ai-chat/component/chat-input-operate/index.vue @@ -807,6 +807,7 @@ const getQuestion = () => { uploadImageList.value.length > 0, uploadDocumentList.value.length > 0, uploadAudioList.value.length > 0, + uploadVideoList.value.length > 0, uploadOtherList.value.length > 0, ] if (fileLength.filter((f) => f).length > 1) { @@ -818,6 +819,8 @@ const getQuestion = () => { } else if (fileLength[2]) { return t('chat.uploadFile.audioMessage') } else if (fileLength[3]) { + return t('chat.uploadFile.videoMessage') + } else if (fileLength[4]) { return t('chat.uploadFile.otherMessage') } } diff --git a/ui/src/components/ai-chat/component/knowledge-source-component/ExecutionDetailCard.vue b/ui/src/components/ai-chat/component/knowledge-source-component/ExecutionDetailCard.vue index 1e0329233..08bdd60c9 100644 --- a/ui/src/components/ai-chat/component/knowledge-source-component/ExecutionDetailCard.vue +++ b/ui/src/components/ai-chat/component/knowledge-source-component/ExecutionDetailCard.vue @@ -102,6 +102,21 @@ +
{{ $t('common.fileUpload.image') }}:
+ +{{ $t('common.fileUpload.document') }}:
@@ -581,8 +596,6 @@ diff --git a/ui/src/locales/lang/en-US/ai-chat.ts b/ui/src/locales/lang/en-US/ai-chat.ts index 0fa4b007b..ceec6b510 100644 --- a/ui/src/locales/lang/en-US/ai-chat.ts +++ b/ui/src/locales/lang/en-US/ai-chat.ts @@ -76,6 +76,7 @@ export default { imageMessage: 'Please process the image content', documentMessage: 'Please understand the content of the document', audioMessage: 'Please understand the audio content', + videoMessage: 'Please understand the video content', otherMessage: 'Please understand the file content', errorMessage: 'Upload Failed', fileMessage: 'Please process the file content', diff --git a/ui/src/locales/lang/zh-CN/ai-chat.ts b/ui/src/locales/lang/zh-CN/ai-chat.ts index 81494bf0c..fddd75386 100644 --- a/ui/src/locales/lang/zh-CN/ai-chat.ts +++ b/ui/src/locales/lang/zh-CN/ai-chat.ts @@ -74,6 +74,7 @@ export default { imageMessage: '请解析图片内容', documentMessage: '请理解文档内容', audioMessage: '请理解音频内容', + videoMessage: '请理解视频内容', otherMessage: '请理解文件内容', errorMessage: '上传失败', fileMessage: '请解析文件内容', diff --git a/ui/src/locales/lang/zh-Hant/ai-chat.ts b/ui/src/locales/lang/zh-Hant/ai-chat.ts index 830a545be..dfe0deee0 100644 --- a/ui/src/locales/lang/zh-Hant/ai-chat.ts +++ b/ui/src/locales/lang/zh-Hant/ai-chat.ts @@ -74,6 +74,7 @@ export default { imageMessage: '請解析圖片內容', documentMessage: '請理解檔案內容', audioMessage: '請理解音訊內容', + videoMessage: '請理解視頻內容', otherMessage: '請理解檔案內容', fileMessage: '請解析文件內容', errorMessage: '上傳失敗', diff --git a/ui/src/workflow/nodes/application-node/index.vue b/ui/src/workflow/nodes/application-node/index.vue index dff926947..698fd2fda 100644 --- a/ui/src/workflow/nodes/application-node/index.vue +++ b/ui/src/workflow/nodes/application-node/index.vue @@ -92,6 +92,28 @@ v-model="form_data.audio_list" /> +