feat: add video message handling and improve video context processing

This commit is contained in:
wxg0103 2025-10-16 15:14:41 +08:00
parent 57b3aa1254
commit 5fdb6dc34b
8 changed files with 106 additions and 50 deletions

View File

@ -77,8 +77,6 @@ class BaseImageUnderstandNode(IImageUnderstandNode):
image,
**kwargs) -> NodeResult:
# 处理不正确的参数
if image is None or not isinstance(image, list):
image = []
workspace_id = self.workflow_manage.get_body().get('workspace_id')
image_model = get_model_instance_by_model_workspace_id(model_id, workspace_id,
**model_params_setting)
@ -91,7 +89,7 @@ class BaseImageUnderstandNode(IImageUnderstandNode):
message_list = self.generate_message_list(image_model, system, prompt,
self.get_history_message(history_chat_record, dialogue_number), image)
self.context['message_list'] = message_list
self.context['image_list'] = image
self.generate_context_image(image)
self.context['dialogue_type'] = dialogue_type
if stream:
r = image_model.stream(message_list)
@ -104,6 +102,12 @@ class BaseImageUnderstandNode(IImageUnderstandNode):
'history_message': history_message, 'question': question.content}, {},
_write_context=write_context)
def generate_context_image(self, image):
if isinstance(image, str) and image.startswith('http'):
self.context['image_list'] = [{'url': image}]
elif image is not None and len(image) > 0:
self.context['image_list'] = image
def get_history_message_for_details(self, history_chat_record, dialogue_number):
start_index = len(history_chat_record) - dialogue_number
history_message = reduce(lambda x, y: [*x, *y], [
@ -164,28 +168,32 @@ class BaseImageUnderstandNode(IImageUnderstandNode):
def generate_prompt_question(self, prompt):
return HumanMessage(self.workflow_manage.generate_prompt(prompt))
def generate_message_list(self, image_model, system: str, prompt: str, history_message, image):
if image is not None and len(image) > 0:
# 处理多张图片
images = []
def _process_images(self, image):
"""
处理图像数据转换为模型可识别的格式
"""
images = []
if isinstance(image, str) and image.startswith('http'):
images.append({'type': 'image_url', 'image_url': {'url': image}})
elif image is not None and len(image) > 0:
for img in image:
if isinstance(img, str) and img.startswith('http'):
images.append({'type': 'image_url', 'image_url': {'url': img}})
else:
file_id = img['file_id']
file = QuerySet(File).filter(id=file_id).first()
image_bytes = file.get_bytes()
base64_image = base64.b64encode(image_bytes).decode("utf-8")
image_format = what(None, image_bytes)
images.append(
{'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
messages = [HumanMessage(
content=[
{'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
*images
])]
file_id = img['file_id']
file = QuerySet(File).filter(id=file_id).first()
image_bytes = file.get_bytes()
base64_image = base64.b64encode(image_bytes).decode("utf-8")
image_format = what(None, image_bytes)
images.append(
{'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
return images
def generate_message_list(self, image_model, system: str, prompt: str, history_message, image):
prompt_text = self.workflow_manage.generate_prompt(prompt)
images = self._process_images(image)
if images:
messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *images])]
else:
messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))]
messages = [HumanMessage(prompt_text)]
if system is not None and len(system) > 0:
return [

View File

@ -1,5 +1,6 @@
# coding=utf-8
import base64
import mimetypes
import time
from functools import reduce
from imghdr import what
@ -76,9 +77,6 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode):
chat_record_id,
video,
**kwargs) -> NodeResult:
# 处理不正确的参数
if video is None or not isinstance(video, list):
video = []
workspace_id = self.workflow_manage.get_body().get('workspace_id')
video_model = get_model_instance_by_model_workspace_id(model_id, workspace_id,
**model_params_setting)
@ -91,7 +89,7 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode):
message_list = self.generate_message_list(video_model, system, prompt,
self.get_history_message(history_chat_record, dialogue_number), video)
self.context['message_list'] = message_list
self.context['video_list'] = video
self.generate_context_video(video)
self.context['dialogue_type'] = dialogue_type
if stream:
r = video_model.stream(message_list)
@ -104,6 +102,12 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode):
'history_message': history_message, 'question': question.content}, {},
_write_context=write_context)
def generate_context_video(self, video):
if isinstance(video, str) and video.startswith('http'):
self.context['video_list'] = [{'url': video}]
elif video is not None and len(video) > 0:
self.context['video_list'] = video
def get_history_message_for_details(self, history_chat_record, dialogue_number):
start_index = len(history_chat_record) - dialogue_number
history_message = reduce(lambda x, y: [*x, *y], [
@ -164,28 +168,29 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode):
def generate_prompt_question(self, prompt):
return HumanMessage(self.workflow_manage.generate_prompt(prompt))
def _process_videos(self, image):
videos = []
if isinstance(image, str) and image.startswith('http'):
videos.append({'type': 'video_url', 'video_url': {'url': image}})
elif image is not None and len(image) > 0:
for img in image:
file_id = img['file_id']
file = QuerySet(File).filter(id=file_id).first()
video_bytes = file.get_bytes()
base64_video = base64.b64encode(video_bytes).decode("utf-8")
video_format = mimetypes.guess_type(file.file_name)[0] # 获取MIME类型
videos.append(
{'type': 'video_url', 'video_url': {'url': f'data:{video_format};base64,{base64_video}'}})
return videos
def generate_message_list(self, video_model, system: str, prompt: str, history_message, video):
if video is not None and len(video) > 0:
# 处理多张图片
videos = []
for img in video:
if isinstance(img, str) and img.startswith('http'):
videos.append({'type': 'video_url', 'video_url': {'url': img}})
else:
file_id = img['file_id']
file = QuerySet(File).filter(id=file_id).first()
video_bytes = file.get_bytes()
base64_video = base64.b64encode(video_bytes).decode("utf-8")
video_format = what(None, video_bytes)
videos.append(
{'type': 'video_url', 'video_url': {'url': f'data:video/{video_format};base64,{base64_video}'}})
messages = [HumanMessage(
content=[
{'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
*videos
])]
prompt_text = self.workflow_manage.generate_prompt(prompt)
videos = self._process_videos(video)
if videos:
messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *videos])]
else:
messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))]
messages = [HumanMessage(prompt_text)]
if system is not None and len(system) > 0:
return [

View File

@ -807,6 +807,7 @@ const getQuestion = () => {
uploadImageList.value.length > 0,
uploadDocumentList.value.length > 0,
uploadAudioList.value.length > 0,
uploadVideoList.value.length > 0,
uploadOtherList.value.length > 0,
]
if (fileLength.filter((f) => f).length > 1) {
@ -818,6 +819,8 @@ const getQuestion = () => {
} else if (fileLength[2]) {
return t('chat.uploadFile.audioMessage')
} else if (fileLength[3]) {
return t('chat.uploadFile.videoMessage')
} else if (fileLength[4]) {
return t('chat.uploadFile.otherMessage')
}
}

View File

@ -102,6 +102,21 @@
</template>
</el-space>
</div>
<div v-if="data.video_list?.length > 0">
<p class="mb-8 color-secondary">{{ $t('common.fileUpload.image') }}:</p>
<el-space wrap>
<template v-for="(f, i) in data.video_list" :key="i">
<video
:src="f.url"
style="width: 170px; display: block"
controls
autoplay
class="border-r-6"
/>
</template>
</el-space>
</div>
<div v-if="data.other_list?.length > 0">
<p class="mb-8 color-secondary">{{ $t('common.fileUpload.document') }}:</p>
@ -581,8 +596,6 @@
<video
v-if="h.type === 'video_url'"
:src="h.video_url.url"
alt=""
fit="cover"
style="width: 40px; height: 40px; display: inline-block"
class="border-r-6 mr-8"
/>

View File

@ -76,6 +76,7 @@ export default {
imageMessage: 'Please process the image content',
documentMessage: 'Please understand the content of the document',
audioMessage: 'Please understand the audio content',
videoMessage: 'Please understand the video content',
otherMessage: 'Please understand the file content',
errorMessage: 'Upload Failed',
fileMessage: 'Please process the file content',

View File

@ -74,6 +74,7 @@ export default {
imageMessage: '请解析图片内容',
documentMessage: '请理解文档内容',
audioMessage: '请理解音频内容',
videoMessage: '请理解视频内容',
otherMessage: '请理解文件内容',
errorMessage: '上传失败',
fileMessage: '请解析文件内容',

View File

@ -74,6 +74,7 @@ export default {
imageMessage: '請解析圖片內容',
documentMessage: '請理解檔案內容',
audioMessage: '請理解音訊內容',
videoMessage: '請理解視頻內容',
otherMessage: '請理解檔案內容',
fileMessage: '請解析文件內容',
errorMessage: '上傳失敗',

View File

@ -92,6 +92,28 @@
v-model="form_data.audio_list"
/>
</el-form-item>
<el-form-item
v-if="form_data.hasOwnProperty('video_list') || 'video_list' in form_data"
:label="$t('views.applicationWorkflow.nodes.videoUnderstandNode.video.label')"
prop="video_list"
:rules="{
message: $t(
'views.applicationWorkflow.nodes.videoUnderstandNode.video.requiredMessage',
),
trigger: 'blur',
required: false,
}"
>
<NodeCascader
ref="nodeCascaderRef"
:nodeModel="nodeModel"
class="w-full"
:placeholder="
$t('views.applicationWorkflow.nodes.videoUnderstandNode.video.requiredMessage')
"
v-model="form_data.video_list"
/>
</el-form-item>
<div v-for="(field, index) in form_data.api_input_field_list" :key="'api-input-' + index">
<el-form-item
:label="field.variable"
@ -191,6 +213,7 @@ const form = {
document_list: ['start-node', 'document'],
image_list: ['start-node', 'image'],
audio_list: ['start-node', 'audio'],
video_list: ['start-node', 'video'],
}
const applicationNodeFormRef = ref<FormInstance>()
@ -294,8 +317,9 @@ const update_field = () => {
handleFileUpload('document', fileUploadSetting.document)
handleFileUpload('image', fileUploadSetting.image)
handleFileUpload('audio', fileUploadSetting.audio)
handleFileUpload('video', fileUploadSetting.video)
} else {
;['document_list', 'image_list', 'audio_list'].forEach((list) => {
;['document_list', 'image_list', 'audio_list', 'video_list'].forEach((list) => {
// eslint-disable-next-line vue/no-mutating-props
delete props.nodeModel.properties.node_data[list]
})