From 59f5c8ac769bedbd39dcc56047fdc06f3f2ebed5 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Wed, 27 Nov 2024 12:17:08 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E6=8A=A5=E9=94=99=E6=B2=A1=E6=9C=89=E6=98=BE?= =?UTF-8?q?=E7=A4=BA=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../impl/base_document_extract_node.py | 6 ++++-- apps/common/handle/impl/doc_split_handle.py | 2 +- apps/common/handle/impl/html_split_handle.py | 2 +- apps/common/handle/impl/pdf_split_handle.py | 2 +- apps/common/handle/impl/text_split_handle.py | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py index 2b30a9320..4e7b2f660 100644 --- a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py +++ b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py @@ -29,18 +29,20 @@ class BaseDocumentExtractNode(IDocumentExtractNode): # 回到文件头 buffer.seek(0) file_content = split_handle.get_content(buffer) - content.append( '## ' + doc['name'] + '\n' + file_content) + content.append('## ' + doc['name'] + '\n' + file_content) break return NodeResult({'content': splitter.join(content)}, {}) def get_details(self, index: int, **kwargs): + # 不保存content全部内容,因为content内容可能会很大 + content = (self.context.get('content')[:500] + '...') if len(self.context.get('content')) > 0 else '' return { 'name': self.node.properties.get('stepName'), "index": index, 'run_time': self.context.get('run_time'), 'type': self.node.type, - 'content': self.context.get('content')[:500] + '...', # 不保存content全部内容,因为content内容可能会很大 + 'content': content, 'status': self.status, 'err_message': self.err_message, 'document_list': self.context.get('document_list') diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py index 350a3921a..6ac6f43f9 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/doc_split_handle.py @@ -198,4 +198,4 @@ class DocSplitHandle(BaseSplitHandle): return self.to_md(doc, image_list, get_image_id_func()) except BaseException as e: traceback.print_exception(e) - return '' \ No newline at end of file + return f'{e}' \ No newline at end of file diff --git a/apps/common/handle/impl/html_split_handle.py b/apps/common/handle/impl/html_split_handle.py index 688904567..bb69e0af0 100644 --- a/apps/common/handle/impl/html_split_handle.py +++ b/apps/common/handle/impl/html_split_handle.py @@ -70,4 +70,4 @@ class HTMLSplitHandle(BaseSplitHandle): return html2text(content) except BaseException as e: traceback.print_exception(e) - return '' \ No newline at end of file + return f'{e}' \ No newline at end of file diff --git a/apps/common/handle/impl/pdf_split_handle.py b/apps/common/handle/impl/pdf_split_handle.py index b759c6d6a..21d243058 100644 --- a/apps/common/handle/impl/pdf_split_handle.py +++ b/apps/common/handle/impl/pdf_split_handle.py @@ -321,4 +321,4 @@ class PdfSplitHandle(BaseSplitHandle): return self.handle_pdf_content(file, pdf_document) except BaseException as e: traceback.print_exception(e) - return '' \ No newline at end of file + return f'{e}' \ No newline at end of file diff --git a/apps/common/handle/impl/text_split_handle.py b/apps/common/handle/impl/text_split_handle.py index 984c4e1e9..1ae22f95f 100644 --- a/apps/common/handle/impl/text_split_handle.py +++ b/apps/common/handle/impl/text_split_handle.py @@ -57,4 +57,4 @@ class TextSplitHandle(BaseSplitHandle): return buffer.decode(detect(buffer)['encoding']) except BaseException as e: traceback.print_exception(e) - return '' \ No newline at end of file + return f'{e}' \ No newline at end of file