From 9c42965543221c35b263b9d5fcc18010bb771730 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Mon, 24 Nov 2025 10:51:57 +0800 Subject: [PATCH] refactor: streamline image handling by updating application and knowledge ID management --- .../impl/base_document_extract_node.py | 15 +++++++++------ .../impl/base_document_split_node.py | 15 +-------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py index ba21dbc8f..739dc10e7 100644 --- a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py +++ b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py @@ -51,19 +51,21 @@ class BaseDocumentExtractNode(IDocumentExtractNode): return NodeResult({'content': '', 'document_list': []}, {}) # 安全获取 application - application = None + application_id = None if (self.workflow_manage and self.workflow_manage.work_flow_post_handler and self.workflow_manage.work_flow_post_handler.chat_info): - application = self.workflow_manage.work_flow_post_handler.chat_info.application + application_id = self.workflow_manage.work_flow_post_handler.chat_info.application.id + knowledge_id = self.workflow_params.get('knowledge_id') # doc文件中的图片保存 def save_image(image_list): for image in image_list: meta = { - 'debug': False if (application and application.id) else True, + 'debug': False if (application_id or knowledge_id) else True, 'chat_id': chat_id, - 'application_id': str(application.id) if (application and application.id) else None, + 'application_id': str(application_id) if application_id else None, + 'knowledge_id': str(knowledge_id) if knowledge_id else None, 'file_id': str(image.id) } file_bytes = image.meta.pop('content') @@ -71,8 +73,9 @@ class BaseDocumentExtractNode(IDocumentExtractNode): FileSerializer(data={ 'file': f, 'meta': meta, - 'source_id': meta['application_id'], - 'source_type': FileSourceType.APPLICATION.value + 'source_id': meta['application_id'] if meta['application_id'] else meta['knowledge_id'], + 'source_type': FileSourceType.APPLICATION.value if meta[ + 'application_id'] else FileSourceType.KNOWLEDGE.value }).upload() document_list = [] diff --git a/apps/application/flow/step_node/document_split_node/impl/base_document_split_node.py b/apps/application/flow/step_node/document_split_node/impl/base_document_split_node.py index f93e3ca50..a8f1aaec5 100644 --- a/apps/application/flow/step_node/document_split_node/impl/base_document_split_node.py +++ b/apps/application/flow/step_node/document_split_node/impl/base_document_split_node.py @@ -4,11 +4,9 @@ import mimetypes from typing import List from django.core.files.uploadedfile import InMemoryUploadedFile -from django.db.models import QuerySet from application.flow.i_step_node import NodeResult from application.flow.step_node.document_split_node.i_document_split_node import IDocumentSplitNode -from knowledge.models import File, FileSourceType from knowledge.serializers.document import default_split_handle, FileBufferHandle @@ -73,18 +71,7 @@ class BaseDocumentSplitNode(IDocumentSplitNode): return NodeResult({'paragraph_list': paragraph_list}, {}) def _save_image(self, image_list): - if image_list is not None and len(image_list) > 0: - exist_image_list = [str(i.get('id')) for i in - QuerySet(File).filter(id__in=[i.id for i in image_list]).values('id')] - save_image_list = [image for image in image_list if not exist_image_list.__contains__(str(image.id))] - save_image_list = list({img.id: img for img in save_image_list}.values()) - # save image - for file in save_image_list: - file_bytes = file.meta.pop('content') - file.meta['knowledge_id'] = self.context.get('knowledge_id') - file.source_type = FileSourceType.KNOWLEDGE - file.source_id = self.context.get('knowledge_id') - file.save(file_bytes) + pass def _process_split_result( self, item, knowledge_id, source_file_id, file_name,