MaxKB/apps/common/handle/impl/table/csv_parse_table_handle.py
CaptainB 76f63642e5 fix: 修复导入csv空行没有过滤的问题
--bug=1047841 --user=刘瑞斌 【知识库】上传csv格式的表格模版,第一行标题导入后分段显示不全 https://www.tapd.cn/57709429/s/1597113
2024-10-24 11:13:26 +08:00

37 lines
1.1 KiB
Python

# coding=utf-8
import logging
from charset_normalizer import detect
from common.handle.base_parse_table_handle import BaseParseTableHandle
max_kb = logging.getLogger("max_kb")
class CsvSplitHandle(BaseParseTableHandle):
def support(self, file, get_buffer):
file_name: str = file.name.lower()
if file_name.endswith(".csv"):
return True
return False
def handle(self, file, get_buffer,save_image):
buffer = get_buffer(file)
try:
content = buffer.decode(detect(buffer)['encoding'])
except BaseException as e:
max_kb.error(f'csv split handle error: {e}')
return [{'name': file.name, 'paragraphs': []}]
csv_model = content.split('\n')
paragraphs = []
# 第一行为标题
title = csv_model[0].split(',')
for row in csv_model[1:]:
if not row:
continue
line = '; '.join([f'{key}:{value}' for key, value in zip(title, row.split(','))])
paragraphs.append({'title': '', 'content': line})
return [{'name': file.name, 'paragraphs': paragraphs}]