diff --git a/apps/common/handle/impl/table/csv_parse_table_handle.py b/apps/common/handle/impl/table/csv_parse_table_handle.py index c3a85db86..dcd971839 100644 --- a/apps/common/handle/impl/table/csv_parse_table_handle.py +++ b/apps/common/handle/impl/table/csv_parse_table_handle.py @@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle): return buffer.decode(detect(buffer)['encoding']) except BaseException as e: max_kb.error(f'csv split handle error: {e}') - return [{'name': file.name, 'paragraphs': []}] \ No newline at end of file + return f'error: {e}' \ No newline at end of file diff --git a/apps/common/handle/impl/table/xls_parse_table_handle.py b/apps/common/handle/impl/table/xls_parse_table_handle.py index a3ef14443..0fee4e35b 100644 --- a/apps/common/handle/impl/table/xls_parse_table_handle.py +++ b/apps/common/handle/impl/table/xls_parse_table_handle.py @@ -63,21 +63,26 @@ class XlsSplitHandle(BaseParseTableHandle): def get_content(self, file): # 打开 .xls 文件 - workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True) - sheets = workbook.sheets() - md_tables = '' - for sheet in sheets: + try: + workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True) + sheets = workbook.sheets() + md_tables = '' + for sheet in sheets: - # 获取表头和内容 - headers = sheet.row_values(0) - data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)] + # 获取表头和内容 + headers = sheet.row_values(0) + data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)] - # 构建 Markdown 表格 - md_table = '| ' + ' | '.join(headers) + ' |\n' - md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' - for row in data: - # 将每个单元格中的内容替换换行符为
以保留原始格式 - md_table += '| ' + ' | '.join([str(cell).replace('\n', '
') if cell else '' for cell in row]) + ' |\n' - md_tables += md_table + '\n\n' + # 构建 Markdown 表格 + md_table = '| ' + ' | '.join(headers) + ' |\n' + md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' + for row in data: + # 将每个单元格中的内容替换换行符为
以保留原始格式 + md_table += '| ' + ' | '.join( + [str(cell).replace('\n', '
') if cell else '' for cell in row]) + ' |\n' + md_tables += md_table + '\n\n' - return md_tables + return md_tables + except Exception as e: + max_kb.error(f'excel split handle error: {e}') + return f'error: {e}' diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py index e92d3c11a..3fd40b2d1 100644 --- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py +++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py @@ -75,28 +75,32 @@ class XlsxSplitHandle(BaseParseTableHandle): def get_content(self, file): - # 加载 Excel 文件 - workbook = load_workbook(file) - md_tables = '' - # 如果未指定 sheet_name,则使用第一个工作表 - for sheetname in workbook.sheetnames: - sheet = workbook[sheetname] if sheetname else workbook.active + try: + # 加载 Excel 文件 + workbook = load_workbook(file) + md_tables = '' + # 如果未指定 sheet_name,则使用第一个工作表 + for sheetname in workbook.sheetnames: + sheet = workbook[sheetname] if sheetname else workbook.active - # 获取工作表的所有行 - rows = list(sheet.iter_rows(values_only=True)) - if not rows: - continue + # 获取工作表的所有行 + rows = list(sheet.iter_rows(values_only=True)) + if not rows: + continue - # 提取表头和内容 - headers = rows[0] - data = rows[1:] + # 提取表头和内容 + headers = rows[0] + data = rows[1:] - # 构建 Markdown 表格 - md_table = '| ' + ' | '.join(headers) + ' |\n' - md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' - for row in data: - md_table += '| ' + ' | '.join( - [str(cell).replace('\n', '
') if cell is not None else '' for cell in row]) + ' |\n' + # 构建 Markdown 表格 + md_table = '| ' + ' | '.join(headers) + ' |\n' + md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' + for row in data: + md_table += '| ' + ' | '.join( + [str(cell).replace('\n', '
') if cell is not None else '' for cell in row]) + ' |\n' - md_tables += md_table + '\n\n' - return md_tables \ No newline at end of file + md_tables += md_table + '\n\n' + return md_tables + except Exception as e: + max_kb.error(f'excel split handle error: {e}') + return f'error: {e}'