diff --git a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py index dabc8ff8f..a2916cbb1 100644 --- a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py +++ b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py @@ -59,11 +59,11 @@ def write_context(node_variable: Dict, workflow_variable: Dict, node: INode, wor _write_context(node_variable, workflow_variable, node, workflow, answer) -def file_id_to_base64(file_id: str): +def file_id_to_base64(file_id: str, video_model): file = QuerySet(File).filter(id=file_id).first() file_bytes = file.get_bytes() - base64_video = base64.b64encode(file_bytes).decode("utf-8") - return [base64_video, get_video_format(file.file_name)] + url = video_model.upload_file_and_get_url(file_bytes, file.file_name) + return url class BaseVideoUnderstandNode(IVideoUnderstandNode): @@ -88,7 +88,8 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): self.context['question'] = question.content # 生成消息列表, 真实的history_message message_list = self.generate_message_list(video_model, system, prompt, - self.get_history_message(history_chat_record, dialogue_number), video) + self.get_history_message(history_chat_record, dialogue_number, + video_model), video) self.context['message_list'] = message_list self.generate_context_video(video) self.context['dialogue_type'] = dialogue_type @@ -140,28 +141,28 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): ]) return HumanMessage(content=chat_record.problem_text) - def get_history_message(self, history_chat_record, dialogue_number): + def get_history_message(self, history_chat_record, dialogue_number, video_model): start_index = len(history_chat_record) - dialogue_number history_message = reduce(lambda x, y: [*x, *y], [ - [self.generate_history_human_message(history_chat_record[index]), - self.generate_history_ai_message(history_chat_record[index])] + [self.generate_history_human_message(history_chat_record[index], video_model), + self.generate_history_ai_message(history_chat_record[index]), video_model] for index in range(start_index if start_index > 0 else 0, len(history_chat_record))], []) return history_message - def generate_history_human_message(self, chat_record): + def generate_history_human_message(self, chat_record, video_model): for data in chat_record.details.values(): if self.node.id == data['node_id'] and 'video_list' in data: video_list = data['video_list'] if len(video_list) == 0 or data['dialogue_type'] == 'WORKFLOW': return HumanMessage(content=chat_record.problem_text) - video_base64_list = [file_id_to_base64(video.get('file_id')) for video in video_list] + video_base64_list = [file_id_to_base64(video.get('file_id'), video_model) for video in video_list] return HumanMessage( content=[ {'type': 'text', 'text': data['question']}, *[{'type': 'video_url', - 'video_url': {'url': f'data:{base64_video[1]};base64,{base64_video[0]}'}} for + 'video_url': {'url': f'{base64_video}'}} for base64_video in video_base64_list] ]) return HumanMessage(content=chat_record.problem_text)