From 560890f71724523022bb6875f5d9301fe8ed5ae8 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Mon, 7 Apr 2025 10:53:11 +0800 Subject: [PATCH] fix: limit chapter title length to 256 characters in pdf_split_handle.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1054363 --user=刘瑞斌 【知识库】导入PDF文档,分段标题长度超长时,没有自动截断 https://www.tapd.cn/57709429/s/1681044 --- apps/common/handle/impl/pdf_split_handle.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/common/handle/impl/pdf_split_handle.py b/apps/common/handle/impl/pdf_split_handle.py index 176b457db..abdac5e19 100644 --- a/apps/common/handle/impl/pdf_split_handle.py +++ b/apps/common/handle/impl/pdf_split_handle.py @@ -173,14 +173,15 @@ class PdfSplitHandle(BaseSplitHandle): # Null characters are not allowed. chapter_text = chapter_text.replace('\0', '') - + # 限制标题长度 + real_chapter_title = chapter_title[:256] # 限制章节内容长度 if 0 < limit < len(chapter_text): split_text = PdfSplitHandle.split_text(chapter_text, limit) for text in split_text: - chapters.append({"title": chapter_title, "content": text}) + chapters.append({"title": real_chapter_title, "content": text}) else: - chapters.append({"title": chapter_title, "content": chapter_text if chapter_text else chapter_title}) + chapters.append({"title": real_chapter_title, "content": chapter_text if chapter_text else real_chapter_title}) # 保存章节内容和章节标题 return chapters