mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: limit chapter title length to 256 characters in pdf_split_handle.py
--bug=1054363 --user=刘瑞斌 【知识库】导入PDF文档,分段标题长度超长时,没有自动截断 https://www.tapd.cn/57709429/s/1681044
This commit is contained in:
parent
675adeeb63
commit
560890f717
|
|
@ -173,14 +173,15 @@ class PdfSplitHandle(BaseSplitHandle):
|
|||
|
||||
# Null characters are not allowed.
|
||||
chapter_text = chapter_text.replace('\0', '')
|
||||
|
||||
# 限制标题长度
|
||||
real_chapter_title = chapter_title[:256]
|
||||
# 限制章节内容长度
|
||||
if 0 < limit < len(chapter_text):
|
||||
split_text = PdfSplitHandle.split_text(chapter_text, limit)
|
||||
for text in split_text:
|
||||
chapters.append({"title": chapter_title, "content": text})
|
||||
chapters.append({"title": real_chapter_title, "content": text})
|
||||
else:
|
||||
chapters.append({"title": chapter_title, "content": chapter_text if chapter_text else chapter_title})
|
||||
chapters.append({"title": real_chapter_title, "content": chapter_text if chapter_text else real_chapter_title})
|
||||
# 保存章节内容和章节标题
|
||||
return chapters
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue