MaxKB/apps/common/handle/impl/table/xls_parse_table_handle.py
2024-09-12 10:49:31 +08:00

48 lines
1.6 KiB
Python

# coding=utf-8
import logging
import xlrd
from common.handle.base_parse_table_handle import BaseParseTableHandle
max_kb = logging.getLogger("max_kb")
class XlsSplitHandle(BaseParseTableHandle):
def support(self, file, get_buffer):
file_name: str = file.name.lower()
buffer = get_buffer(file)
if file_name.endswith(".xls") and xlrd.inspect_format(content=buffer):
return True
return False
def handle(self, file, get_buffer, save_image):
buffer = get_buffer(file)
try:
wb = xlrd.open_workbook(file_contents=buffer)
result = []
sheets = wb.sheets()
for sheet in sheets:
paragraphs = []
rows = iter([sheet.row_values(i) for i in range(sheet.nrows)])
if not rows: continue
ti = next(rows)
for r in rows:
l = []
for i, c in enumerate(r):
if not c:
continue
t = str(ti[i]) if i < len(ti) else ""
t += (": " if t else "") + str(c)
l.append(t)
l = "; ".join(l)
if sheet.name.lower().find("sheet") < 0:
l += " ——" + sheet.name
paragraphs.append({'title': '', 'content': l})
result.append({'name': sheet.name, 'paragraphs': paragraphs})
except BaseException as e:
max_kb.error(f'excel split handle error: {e}')
return [{'name': file.name, 'paragraphs': []}]
return result