From b90995d3aa56a5aee15c7b74a1cf5b4a202d17c8 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Sat, 8 Feb 2025 15:02:49 +0800 Subject: [PATCH] fix: defect of incorrect document names after importing CSV and docx files into the knowledge base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1052039 --user=王孝刚 【知识库】-压缩文件中包含csv、docx文件时,导入到知识库后,文档名称包含文件夹名称 https://www.tapd.cn/57709429/s/1651752 --- apps/common/handle/impl/csv_split_handle.py | 4 +++- apps/common/handle/impl/doc_split_handle.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/common/handle/impl/csv_split_handle.py b/apps/common/handle/impl/csv_split_handle.py index 11dbdc785..6a4849c67 100644 --- a/apps/common/handle/impl/csv_split_handle.py +++ b/apps/common/handle/impl/csv_split_handle.py @@ -8,6 +8,7 @@ """ import csv import io +import os from typing import List from charset_normalizer import detect @@ -28,7 +29,8 @@ class CsvSplitHandle(BaseSplitHandle): def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image): buffer = get_buffer(file) paragraphs = [] - result = {'name': file.name, 'content': paragraphs} + file_name = os.path.basename(file.name) + result = {'name': file_name, 'content': paragraphs} try: reader = csv.reader(io.TextIOWrapper(io.BytesIO(buffer), encoding=detect(buffer)['encoding'])) try: diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py index d97a8e45b..753e74fc4 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/doc_split_handle.py @@ -7,6 +7,7 @@ @desc: """ import io +import os import re import traceback import uuid @@ -167,6 +168,7 @@ class DocSplitHandle(BaseSplitHandle): in elements]) def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image): + file_name = os.path.basename(file.name) try: image_list = [] buffer = get_buffer(file) @@ -180,9 +182,9 @@ class DocSplitHandle(BaseSplitHandle): split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit) except BaseException as e: traceback.print_exception(e) - return {'name': file.name, + return {'name': file_name, 'content': []} - return {'name': file.name, + return {'name': file_name, 'content': split_model.parse(content) }