From b90995d3aa56a5aee15c7b74a1cf5b4a202d17c8 Mon Sep 17 00:00:00 2001
From: wxg0103 <727495428@qq.com>
Date: Sat, 8 Feb 2025 15:02:49 +0800
Subject: [PATCH] fix: defect of incorrect document names after importing CSV
 and docx files into the knowledge base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

--bug=1052039 --user=王孝刚 【知识库】-压缩文件中包含csv、docx文件时，导入到知识库后，文档名称包含文件夹名称 https://www.tapd.cn/57709429/s/1651752
---
 apps/common/handle/impl/csv_split_handle.py | 4 +++-
 apps/common/handle/impl/doc_split_handle.py | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/apps/common/handle/impl/csv_split_handle.py b/apps/common/handle/impl/csv_split_handle.py
index 11dbdc785..6a4849c67 100644
--- a/apps/common/handle/impl/csv_split_handle.py
+++ b/apps/common/handle/impl/csv_split_handle.py
@@ -8,6 +8,7 @@
 """
 import csv
 import io
+import os
 from typing import List
 
 from charset_normalizer import detect
@@ -28,7 +29,8 @@ class CsvSplitHandle(BaseSplitHandle):
     def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
         buffer = get_buffer(file)
         paragraphs = []
-        result = {'name': file.name, 'content': paragraphs}
+        file_name = os.path.basename(file.name)
+        result = {'name': file_name, 'content': paragraphs}
         try:
             reader = csv.reader(io.TextIOWrapper(io.BytesIO(buffer), encoding=detect(buffer)['encoding']))
             try:
diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py
index d97a8e45b..753e74fc4 100644
--- a/apps/common/handle/impl/doc_split_handle.py
+++ b/apps/common/handle/impl/doc_split_handle.py
@@ -7,6 +7,7 @@
     @desc:
 """
 import io
+import os
 import re
 import traceback
 import uuid
@@ -167,6 +168,7 @@ class DocSplitHandle(BaseSplitHandle):
              in elements])
 
     def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
+        file_name = os.path.basename(file.name)
         try:
             image_list = []
             buffer = get_buffer(file)
@@ -180,9 +182,9 @@ class DocSplitHandle(BaseSplitHandle):
                 split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit)
         except BaseException as e:
             traceback.print_exception(e)
-            return {'name': file.name,
+            return {'name': file_name,
                     'content': []}
-        return {'name': file.name,
+        return {'name': file_name,
                 'content': split_model.parse(content)
                 }