diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py index 2ae22d019..a98c6ae75 100644 --- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py +++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py @@ -78,28 +78,34 @@ class XlsxSplitHandle(BaseParseTableHandle): try: # 加载 Excel 文件 workbook = load_workbook(file) + try: + image_dict: dict = xlsx_embed_cells_images(file) + if len(image_dict) > 0: + save_image(image_dict.values()) + except Exception as e: + print(f'{e}') + image_dict = {} md_tables = '' # 如果未指定 sheet_name,则使用第一个工作表 for sheetname in workbook.sheetnames: sheet = workbook[sheetname] if sheetname else workbook.active - - # 获取工作表的所有行 - rows = list(sheet.iter_rows(values_only=True)) - if not rows: - continue + rows = self.fill_merged_cells(sheet, image_dict) # 提取表头和内容 - headers = rows[0] - data = rows[1:] + + headers = [f"{key}" for key, value in rows[0].items()] # 构建 Markdown 表格 md_table = '| ' + ' | '.join(headers) + ' |\n' md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' - for row in data: + for row in rows: + r = [f'{value}' for key, value in row.items()] md_table += '| ' + ' | '.join( - [str(cell).replace('\n', '
') if cell is not None else '' for cell in row]) + ' |\n' + [str(cell).replace('\n', '
') if cell is not None else '' for cell in r]) + ' |\n' md_tables += md_table + '\n\n' + + md_tables = md_tables.replace('/api/image/', '/api/file/') return md_tables except Exception as e: max_kb.error(f'excel split handle error: {e}')