From 5e499e6afa141486fee0c7180337e79737cfa13b Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Tue, 16 Apr 2024 20:59:27 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20PDF=E4=B8=8A=E4=BC=A0=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93=E5=BC=80=E5=A7=8B=E5=AF=BC=E5=85=A5=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E6=8A=A5=E9=94=99=20#122=20(#125)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/util/split_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/common/util/split_model.py b/apps/common/util/split_model.py index 16945e0ec..19b265fc6 100644 --- a/apps/common/util/split_model.py +++ b/apps/common/util/split_model.py @@ -336,6 +336,7 @@ class SplitModel: :return: 解析后数据 {content:段落数据,keywords:[‘段落关键词’],parent_chain:['段落父级链路']} """ text = text.replace('\r', '\n') + text = text.replace("\0", '') result_tree = self.parse_to_tree(text, 0) result = result_tree_to_paragraph(result_tree, [], []) return [item for item in [self.post_reset_paragraph(row) for row in result] if