From 9d4b2bf0102dd8496821e17e46f1e43869a05ab7 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Tue, 18 Nov 2025 18:27:11 +0800 Subject: [PATCH] feat: enhance image and video handling by supporting URLs and file IDs --- .../impl/base_image_to_video_node.py | 2 +- .../impl/base_image_understand_node.py | 44 ++++++++++++++----- .../impl/base_video_understand_node.py | 33 ++++++++++---- apps/oss/retrieval_urls.py | 2 + apps/oss/urls.py | 1 + apps/oss/views/file.py | 33 ++++++++++++++ 6 files changed, 94 insertions(+), 21 deletions(-) diff --git a/apps/application/flow/step_node/image_to_video_step_node/impl/base_image_to_video_node.py b/apps/application/flow/step_node/image_to_video_step_node/impl/base_image_to_video_node.py index ceba18eee..aa146cea2 100644 --- a/apps/application/flow/step_node/image_to_video_step_node/impl/base_image_to_video_node.py +++ b/apps/application/flow/step_node/image_to_video_step_node/impl/base_image_to_video_node.py @@ -74,7 +74,7 @@ class BaseImageToVideoNode(IImageToVideoNode): def get_file_base64(self, image_url): try: if isinstance(image_url, list): - image_url = image_url[0].get('file_id') + image_url = image_url[0].get('file_id') if 'file_id' in image_url[0] else image_url[0].get('url') if isinstance(image_url, str) and not image_url.startswith('http'): file = QuerySet(File).filter(id=image_url).first() file_bytes = file.get_bytes() diff --git a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py index 7f57b4340..f93e670c1 100644 --- a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py +++ b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py @@ -131,11 +131,18 @@ class BaseImageUnderstandNode(IImageUnderstandNode): image_list = data['image_list'] if len(image_list) == 0 or data['dialogue_type'] == 'WORKFLOW': return HumanMessage(content=chat_record.problem_text) - file_id_list = [image.get('file_id') for image in image_list] + + file_id_list = [] + url_list = [] + for image in image_list: + if 'file_id' in image: + file_id_list.append(image.get('file_id')) + elif 'url' in image: + url_list.append(image.get('url')) return HumanMessage(content=[ {'type': 'text', 'text': data['question']}, - *[{'type': 'image_url', 'image_url': {'url': f'./oss/file/{file_id}'}} for file_id in file_id_list] - + *[{'type': 'image_url', 'image_url': {'url': f'./oss/file/{file_id}'}} for file_id in file_id_list], + *[{'type': 'image_url', 'image_url': {'url': url}} for url in url_list] ]) return HumanMessage(content=chat_record.problem_text) @@ -155,13 +162,22 @@ class BaseImageUnderstandNode(IImageUnderstandNode): image_list = data['image_list'] if len(image_list) == 0 or data['dialogue_type'] == 'WORKFLOW': return HumanMessage(content=chat_record.problem_text) - image_base64_list = [file_id_to_base64(image.get('file_id')) for image in image_list] + file_id_list = [] + url_list = [] + for image in image_list: + if 'file_id' in image: + file_id_list.append(image.get('file_id')) + elif 'url' in image: + url_list.append(image.get('url')) + image_base64_list = [file_id_to_base64(file_id) for file_id in file_id_list] + return HumanMessage( content=[ {'type': 'text', 'text': data['question']}, *[{'type': 'image_url', 'image_url': {'url': f'data:image/{base64_image[1]};base64,{base64_image[0]}'}} for - base64_image in image_base64_list] + base64_image in image_base64_list], + *[{'type': 'image_url', 'image_url': url} for url in url_list] ]) return HumanMessage(content=chat_record.problem_text) @@ -177,13 +193,17 @@ class BaseImageUnderstandNode(IImageUnderstandNode): images.append({'type': 'image_url', 'image_url': {'url': image}}) elif image is not None and len(image) > 0: for img in image: - file_id = img['file_id'] - file = QuerySet(File).filter(id=file_id).first() - image_bytes = file.get_bytes() - base64_image = base64.b64encode(image_bytes).decode("utf-8") - image_format = what(None, image_bytes) - images.append( - {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}}) + if 'file_id' in img: + file_id = img['file_id'] + file = QuerySet(File).filter(id=file_id).first() + image_bytes = file.get_bytes() + base64_image = base64.b64encode(image_bytes).decode("utf-8") + image_format = what(None, image_bytes) + images.append( + {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}}) + elif 'url' in img and img['url'].startswith('http'): + images.append( + {'type': 'image_url', 'image_url': {'url': img["url"]}}) return images def generate_message_list(self, image_model, system: str, prompt: str, history_message, image): diff --git a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py index 67ca26170..9a478e6c9 100644 --- a/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py +++ b/apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py @@ -131,11 +131,17 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): # 增加对 None 和空列表的检查 if not video_list or len(video_list) == 0 or data['dialogue_type'] == 'WORKFLOW': return HumanMessage(content=chat_record.problem_text) - file_id_list = [video.get('file_id') for video in video_list] + file_id_list = [] + url_list = [] + for image in video_list: + if 'file_id' in image: + file_id_list.append(image.get('file_id')) + elif 'url' in image: + url_list.append(image.get('url')) return HumanMessage(content=[ {'type': 'text', 'text': data['question']}, - *[{'type': 'video_url', 'video_url': {'url': f'./oss/file/{file_id}'}} for file_id in file_id_list] - + *[{'type': 'video_url', 'video_url': {'url': f'./oss/file/{file_id}'}} for file_id in file_id_list], + *[{'type': 'video_url', 'video_url': {'url': url}} for url in url_list], ]) return HumanMessage(content=chat_record.problem_text) @@ -155,6 +161,13 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): video_list = data['video_list'] if len(video_list) == 0 or data['dialogue_type'] == 'WORKFLOW': return HumanMessage(content=chat_record.problem_text) + file_id_list = [] + url_list = [] + for image in video_list: + if 'file_id' in image: + file_id_list.append(image.get('file_id')) + elif 'url' in image: + url_list.append(image.get('url')) video_base64_list = [file_id_to_base64(video.get('file_id'), video_model) for video in video_list] return HumanMessage( content=[ @@ -174,11 +187,15 @@ class BaseVideoUnderstandNode(IVideoUnderstandNode): videos.append({'type': 'video_url', 'video_url': {'url': image}}) elif image is not None and len(image) > 0: for img in image: - file_id = img['file_id'] - file = QuerySet(File).filter(id=file_id).first() - url = video_model.upload_file_and_get_url(file.get_bytes(), file.file_name) - videos.append( - {'type': 'video_url', 'video_url': {'url': url}}) + if 'file_id' in img: + file_id = img['file_id'] + file = QuerySet(File).filter(id=file_id).first() + url = video_model.upload_file_and_get_url(file.get_bytes(), file.file_name) + videos.append( + {'type': 'video_url', 'video_url': {'url': url}}) + elif 'url' in img and img['url'].startswith('http'): + videos.append( + {'type': 'video_url', 'video_url': {'url': img['url']}}) return videos def generate_message_list(self, video_model, system: str, prompt: str, history_message, video): diff --git a/apps/oss/retrieval_urls.py b/apps/oss/retrieval_urls.py index 77687a646..816c242ee 100644 --- a/apps/oss/retrieval_urls.py +++ b/apps/oss/retrieval_urls.py @@ -17,5 +17,7 @@ urlpatterns = [ views.FileRetrievalView.as_view()), re_path(rf'oss/file/(?P[\w-]+)/?$', views.FileRetrievalView.as_view()), + re_path(rf'^/oss/get_url/(?P[\w-]+)?$', + views.GetUrlView.as_view()), ] diff --git a/apps/oss/urls.py b/apps/oss/urls.py index f344049f1..3537b026b 100644 --- a/apps/oss/urls.py +++ b/apps/oss/urls.py @@ -6,4 +6,5 @@ app_name = 'oss' urlpatterns = [ path('oss/file', views.FileView.as_view()), + path('oss/get_url', views.GetUrlView.as_view()), ] diff --git a/apps/oss/views/file.py b/apps/oss/views/file.py index a34dbca28..b9f6ca67d 100644 --- a/apps/oss/views/file.py +++ b/apps/oss/views/file.py @@ -1,4 +1,7 @@ # coding=utf-8 +import base64 + +import requests from django.utils.translation import gettext_lazy as _ from drf_spectacular.utils import extend_schema from rest_framework.parsers import MultiPartParser @@ -66,3 +69,33 @@ class FileView(APIView): @log(menu='file', operate='Delete file') def delete(self, request: Request, file_id: str): return result.success(FileSerializer.Operate(data={'id': file_id}).delete()) + + +class GetUrlView(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=['GET'], + summary=_('Get url'), + description=_('Get url'), + operation_id=_('Get url'), # type: ignore + tags=[_('Chat')] # type: ignore + ) + def get(self, request: Request): + url = request.query_params.get('url') + response = requests.get(url) + # 返回状态码 响应内容大小 响应的contenttype 还有字节流 + content_type = response.headers.get('Content-Type', '') + # 根据内容类型决定如何处理 + if 'text' in content_type or 'json' in content_type: + content = response.text + else: + # 二进制内容使用Base64编码 + content = base64.b64encode(response.content).decode('utf-8') + + return result.success({ + 'status_code': response.status_code, + 'Content-Length': response.headers.get('Content-Length', 0), + 'Content-Type': content_type, + 'content': content, + })