fix: 修复文档提取报错没有显示的问题

This commit is contained in:
CaptainB 2024-11-27 12:17:08 +08:00 committed by 刘瑞斌
parent 33ed8aa4ae
commit 59f5c8ac76
5 changed files with 8 additions and 6 deletions

View File

@ -29,18 +29,20 @@ class BaseDocumentExtractNode(IDocumentExtractNode):
# 回到文件头
buffer.seek(0)
file_content = split_handle.get_content(buffer)
content.append( '## ' + doc['name'] + '\n' + file_content)
content.append('## ' + doc['name'] + '\n' + file_content)
break
return NodeResult({'content': splitter.join(content)}, {})
def get_details(self, index: int, **kwargs):
# 不保存content全部内容因为content内容可能会很大
content = (self.context.get('content')[:500] + '...') if len(self.context.get('content')) > 0 else ''
return {
'name': self.node.properties.get('stepName'),
"index": index,
'run_time': self.context.get('run_time'),
'type': self.node.type,
'content': self.context.get('content')[:500] + '...', # 不保存content全部内容因为content内容可能会很大
'content': content,
'status': self.status,
'err_message': self.err_message,
'document_list': self.context.get('document_list')

View File

@ -198,4 +198,4 @@ class DocSplitHandle(BaseSplitHandle):
return self.to_md(doc, image_list, get_image_id_func())
except BaseException as e:
traceback.print_exception(e)
return ''
return f'{e}'

View File

@ -70,4 +70,4 @@ class HTMLSplitHandle(BaseSplitHandle):
return html2text(content)
except BaseException as e:
traceback.print_exception(e)
return ''
return f'{e}'

View File

@ -321,4 +321,4 @@ class PdfSplitHandle(BaseSplitHandle):
return self.handle_pdf_content(file, pdf_document)
except BaseException as e:
traceback.print_exception(e)
return ''
return f'{e}'

View File

@ -57,4 +57,4 @@ class TextSplitHandle(BaseSplitHandle):
return buffer.decode(detect(buffer)['encoding'])
except BaseException as e:
traceback.print_exception(e)
return ''
return f'{e}'