From 758f424000e8700cde4c76391af9c01a112ac1b7 Mon Sep 17 00:00:00 2001
From: CaptainB <bin@fit2cloud.com>
Date: Wed, 25 Jun 2025 10:53:43 +0800
Subject: [PATCH] fix: replace get_byte() with get_bytes() for consistency in
 file handling

---
 .../document_extract_node/impl/base_document_extract_node.py  | 2 +-
 .../impl/base_image_understand_node.py                        | 4 ++--
 .../speech_to_text_step_node/impl/base_speech_to_text_node.py | 2 +-
 apps/knowledge/serializers/document.py                        | 3 ++-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
index e538fcefd..4d52d2d1f 100644
--- a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
+++ b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
@@ -66,7 +66,7 @@ class BaseDocumentExtractNode(IDocumentExtractNode):
 
         for doc in document:
             file = QuerySet(File).filter(id=doc['file_id']).first()
-            buffer = io.BytesIO(file.get_byte().tobytes())
+            buffer = io.BytesIO(file.get_bytes().tobytes())
             buffer.name = doc['name']  # this is the important line
 
             for split_handle in (parse_table_handle_list + split_handles):
diff --git a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py
index b4813cc12..c8e3723ba 100644
--- a/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py
+++ b/apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py
@@ -59,7 +59,7 @@ def write_context(node_variable: Dict, workflow_variable: Dict, node: INode, wor
 
 def file_id_to_base64(file_id: str):
     file = QuerySet(File).filter(id=file_id).first()
-    file_bytes = file.get_byte()
+    file_bytes = file.get_bytes()
     base64_image = base64.b64encode(file_bytes).decode("utf-8")
     return [base64_image, what(None, file_bytes.tobytes())]
 
@@ -171,7 +171,7 @@ class BaseImageUnderstandNode(IImageUnderstandNode):
             for img in image:
                 file_id = img['file_id']
                 file = QuerySet(File).filter(id=file_id).first()
-                image_bytes = file.get_byte()
+                image_bytes = file.get_bytes()
                 base64_image = base64.b64encode(image_bytes).decode("utf-8")
                 image_format = what(None, image_bytes.tobytes())
                 images.append(
diff --git a/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py b/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py
index 1ddbc9fd5..c533a30c5 100644
--- a/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py
+++ b/apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py
@@ -30,7 +30,7 @@ class BaseSpeechToTextNode(ISpeechToTextNode):
             # 根据file_name 吧文件转成mp3格式
             file_format = file.file_name.split('.')[-1]
             with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_format}') as temp_file:
-                temp_file.write(file.get_byte().tobytes())
+                temp_file.write(file.get_bytes().tobytes())
                 temp_file_path = temp_file.name
             with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_amr_file:
                 temp_mp3_path = temp_amr_file.name
diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py
index b3ecfb517..fc2d799fd 100644
--- a/apps/knowledge/serializers/document.py
+++ b/apps/knowledge/serializers/document.py
@@ -558,7 +558,8 @@ class DocumentSerializers(serializers.Serializer):
 
         def download_source_file(self):
             self.is_valid(raise_exception=True)
-            return FileSerializer.Operate(id=self.data.get('knowledge_id')).get(with_valid=True)
+            file = QuerySet(File).filter(source_id=self.data.get('document_id')).first()
+            return FileSerializer.Operate(id=file.id).get(with_valid=True)
 
         def one(self, with_valid=False):
             if with_valid: