mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
chore: 文档内容无法提取的时候输出错误信息
This commit is contained in:
parent
98db08d263
commit
64e8f4dc9f
|
|
@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle):
|
|||
return buffer.decode(detect(buffer)['encoding'])
|
||||
except BaseException as e:
|
||||
max_kb.error(f'csv split handle error: {e}')
|
||||
return [{'name': file.name, 'paragraphs': []}]
|
||||
return f'error: {e}'
|
||||
|
|
@ -63,21 +63,26 @@ class XlsSplitHandle(BaseParseTableHandle):
|
|||
|
||||
def get_content(self, file):
|
||||
# 打开 .xls 文件
|
||||
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
|
||||
sheets = workbook.sheets()
|
||||
md_tables = ''
|
||||
for sheet in sheets:
|
||||
try:
|
||||
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
|
||||
sheets = workbook.sheets()
|
||||
md_tables = ''
|
||||
for sheet in sheets:
|
||||
|
||||
# 获取表头和内容
|
||||
headers = sheet.row_values(0)
|
||||
data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)]
|
||||
# 获取表头和内容
|
||||
headers = sheet.row_values(0)
|
||||
data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)]
|
||||
|
||||
# 构建 Markdown 表格
|
||||
md_table = '| ' + ' | '.join(headers) + ' |\n'
|
||||
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
||||
for row in data:
|
||||
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
|
||||
md_table += '| ' + ' | '.join([str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
|
||||
md_tables += md_table + '\n\n'
|
||||
# 构建 Markdown 表格
|
||||
md_table = '| ' + ' | '.join(headers) + ' |\n'
|
||||
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
||||
for row in data:
|
||||
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
|
||||
md_table += '| ' + ' | '.join(
|
||||
[str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
|
||||
md_tables += md_table + '\n\n'
|
||||
|
||||
return md_tables
|
||||
return md_tables
|
||||
except Exception as e:
|
||||
max_kb.error(f'excel split handle error: {e}')
|
||||
return f'error: {e}'
|
||||
|
|
|
|||
|
|
@ -75,28 +75,32 @@ class XlsxSplitHandle(BaseParseTableHandle):
|
|||
|
||||
|
||||
def get_content(self, file):
|
||||
# 加载 Excel 文件
|
||||
workbook = load_workbook(file)
|
||||
md_tables = ''
|
||||
# 如果未指定 sheet_name,则使用第一个工作表
|
||||
for sheetname in workbook.sheetnames:
|
||||
sheet = workbook[sheetname] if sheetname else workbook.active
|
||||
try:
|
||||
# 加载 Excel 文件
|
||||
workbook = load_workbook(file)
|
||||
md_tables = ''
|
||||
# 如果未指定 sheet_name,则使用第一个工作表
|
||||
for sheetname in workbook.sheetnames:
|
||||
sheet = workbook[sheetname] if sheetname else workbook.active
|
||||
|
||||
# 获取工作表的所有行
|
||||
rows = list(sheet.iter_rows(values_only=True))
|
||||
if not rows:
|
||||
continue
|
||||
# 获取工作表的所有行
|
||||
rows = list(sheet.iter_rows(values_only=True))
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
# 提取表头和内容
|
||||
headers = rows[0]
|
||||
data = rows[1:]
|
||||
# 提取表头和内容
|
||||
headers = rows[0]
|
||||
data = rows[1:]
|
||||
|
||||
# 构建 Markdown 表格
|
||||
md_table = '| ' + ' | '.join(headers) + ' |\n'
|
||||
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
||||
for row in data:
|
||||
md_table += '| ' + ' | '.join(
|
||||
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
|
||||
# 构建 Markdown 表格
|
||||
md_table = '| ' + ' | '.join(headers) + ' |\n'
|
||||
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
||||
for row in data:
|
||||
md_table += '| ' + ' | '.join(
|
||||
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
|
||||
|
||||
md_tables += md_table + '\n\n'
|
||||
return md_tables
|
||||
md_tables += md_table + '\n\n'
|
||||
return md_tables
|
||||
except Exception as e:
|
||||
max_kb.error(f'excel split handle error: {e}')
|
||||
return f'error: {e}'
|
||||
|
|
|
|||
Loading…
Reference in New Issue