diff --git a/apps/common/handle/impl/qa/csv_parse_qa_handle.py b/apps/common/handle/impl/qa/csv_parse_qa_handle.py index 75c22cbda..e264ea391 100644 --- a/apps/common/handle/impl/qa/csv_parse_qa_handle.py +++ b/apps/common/handle/impl/qa/csv_parse_qa_handle.py @@ -8,10 +8,12 @@ """ import csv import io +import traceback from charset_normalizer import detect from common.handle.base_parse_qa_handle import BaseParseQAHandle, get_title_row_index_dict, get_row_value +from common.utils.logger import maxkb_logger def read_csv_standard(file_path): @@ -56,4 +58,5 @@ class CsvParseQAHandle(BaseParseQAHandle): 'problem_list': problem_list}) return [{'name': file.name, 'paragraphs': paragraph_list}] except Exception as e: + maxkb_logger.error(f"Error processing CSV file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] diff --git a/apps/common/handle/impl/qa/xls_parse_qa_handle.py b/apps/common/handle/impl/qa/xls_parse_qa_handle.py index 06edb1fb3..6032be46e 100644 --- a/apps/common/handle/impl/qa/xls_parse_qa_handle.py +++ b/apps/common/handle/impl/qa/xls_parse_qa_handle.py @@ -6,10 +6,12 @@ @date:2024/5/21 14:59 @desc: """ +import traceback import xlrd from common.handle.base_parse_qa_handle import BaseParseQAHandle, get_title_row_index_dict, get_row_value +from common.utils.logger import maxkb_logger def handle_sheet(file_name, sheet): @@ -58,4 +60,5 @@ class XlsParseQAHandle(BaseParseQAHandle): sheet.name, sheet) for sheet in worksheets] if row is not None] except Exception as e: + maxkb_logger.error(f"Error processing XLS file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] diff --git a/apps/common/handle/impl/qa/xlsx_parse_qa_handle.py b/apps/common/handle/impl/qa/xlsx_parse_qa_handle.py index 287fb9cda..4adf2ff48 100644 --- a/apps/common/handle/impl/qa/xlsx_parse_qa_handle.py +++ b/apps/common/handle/impl/qa/xlsx_parse_qa_handle.py @@ -7,11 +7,13 @@ @desc: """ import io +import traceback import openpyxl from common.handle.base_parse_qa_handle import BaseParseQAHandle, get_title_row_index_dict, get_row_value from common.handle.impl.common_handle import xlsx_embed_cells_images +from common.utils.logger import maxkb_logger def handle_sheet(file_name, sheet, image_dict): @@ -69,4 +71,5 @@ class XlsxParseQAHandle(BaseParseQAHandle): sheet.title, sheet, image_dict) for sheet in worksheets] if row is not None] except Exception as e: + maxkb_logger.error(f"Error processing XLSX file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] diff --git a/apps/common/handle/impl/table/csv_parse_table_handle.py b/apps/common/handle/impl/table/csv_parse_table_handle.py index e28f3d5f3..27853f559 100644 --- a/apps/common/handle/impl/table/csv_parse_table_handle.py +++ b/apps/common/handle/impl/table/csv_parse_table_handle.py @@ -1,5 +1,6 @@ # coding=utf-8 import logging +import traceback from charset_normalizer import detect @@ -19,7 +20,7 @@ class CsvParseTableHandle(BaseParseTableHandle): try: content = buffer.decode(detect(buffer)['encoding']) except BaseException as e: - maxkb_logger.error(f'csv split handle error: {e}') + maxkb_logger.error(f"Error processing CSV file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] csv_model = content.split('\n') diff --git a/apps/common/handle/impl/table/xls_parse_table_handle.py b/apps/common/handle/impl/table/xls_parse_table_handle.py index 780a6eae9..74a5a2df6 100644 --- a/apps/common/handle/impl/table/xls_parse_table_handle.py +++ b/apps/common/handle/impl/table/xls_parse_table_handle.py @@ -1,5 +1,6 @@ # coding=utf-8 import logging +import traceback import xlrd @@ -55,7 +56,7 @@ class XlsParseTableHandle(BaseParseTableHandle): result.append({'name': sheet.name, 'paragraphs': paragraphs}) except BaseException as e: - maxkb_logger.error(f'excel split handle error: {e}') + maxkb_logger.error(f"Error processing XLS file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] return result diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py index 4ab214b8a..155c8266a 100644 --- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py +++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py @@ -1,6 +1,7 @@ # coding=utf-8 import io import logging +import traceback from openpyxl import load_workbook @@ -73,7 +74,7 @@ class XlsxParseTableHandle(BaseParseTableHandle): result.append({'name': sheetname, 'paragraphs': paragraphs}) except BaseException as e: - maxkb_logger.error(f'excel split handle error: {e}') + maxkb_logger.error(f"Error processing XLSX file {file.name}: {e}, {traceback.format_exc()}") return [{'name': file.name, 'paragraphs': []}] return result diff --git a/apps/common/handle/impl/text/csv_split_handle.py b/apps/common/handle/impl/text/csv_split_handle.py index 3ea690e0e..e950ea395 100644 --- a/apps/common/handle/impl/text/csv_split_handle.py +++ b/apps/common/handle/impl/text/csv_split_handle.py @@ -9,11 +9,13 @@ import csv import io import os +import traceback from typing import List from charset_normalizer import detect from common.handle.base_split_handle import BaseSplitHandle +from common.utils.logger import maxkb_logger def post_cell(cell_value): @@ -60,6 +62,7 @@ class CsvSplitHandle(BaseSplitHandle): paragraphs.append({'content': result_item_content, 'title': ''}) return result except Exception as e: + maxkb_logger.error(f"Error processing CSV file {file.name}: {e}, {traceback.format_exc()}") return result def get_content(self, file, save_image): diff --git a/apps/common/handle/impl/text/doc_split_handle.py b/apps/common/handle/impl/text/doc_split_handle.py index 2576c121d..893c2dd1b 100644 --- a/apps/common/handle/impl/text/doc_split_handle.py +++ b/apps/common/handle/impl/text/doc_split_handle.py @@ -7,7 +7,6 @@ @desc: """ import io -import logging import os import re import traceback @@ -155,7 +154,7 @@ class DocSplitHandle(BaseSplitHandle): return title except Exception as e: - traceback.print_exc() + maxkb_logger.error(f"Error processing DOC file: {e}, {traceback.format_exc()}") return paragraph.text return get_paragraph_txt(paragraph, doc, images_list, get_image_id) @@ -207,12 +206,15 @@ class DocSplitHandle(BaseSplitHandle): else: split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit) except BaseException as e: - traceback.print_exception(e) - return {'name': file_name, - 'content': []} - return {'name': file_name, - 'content': split_model.parse(content) - } + maxkb_logger.error(f"Error processing XLSX file {file.name}: {e}, {traceback.format_exc()}") + return { + 'name': file_name, + 'content': [] + } + return { + 'name': file_name, + 'content': split_model.parse(content) + } def support(self, file, get_buffer): file_name: str = file.name.lower() diff --git a/apps/common/handle/impl/text/html_split_handle.py b/apps/common/handle/impl/text/html_split_handle.py index 33c7aa7c8..78caf3d40 100644 --- a/apps/common/handle/impl/text/html_split_handle.py +++ b/apps/common/handle/impl/text/html_split_handle.py @@ -15,6 +15,7 @@ from charset_normalizer import detect from html2text import html2text from common.handle.base_split_handle import BaseSplitHandle +from common.utils.logger import maxkb_logger from common.utils.split_model import SplitModel default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), @@ -55,11 +56,15 @@ class HTMLSplitHandle(BaseSplitHandle): content = buffer.decode(encoding) content = html2text(content) except BaseException as e: - return {'name': file.name, - 'content': []} - return {'name': file.name, - 'content': split_model.parse(content) - } + maxkb_logger.error(f"Error processing HTML file {file.name}: {e}, {traceback.format_exc()}") + + return { + 'name': file.name, 'content': [] + } + return { + 'name': file.name, + 'content': split_model.parse(content) + } def get_content(self, file, save_image): buffer = file.read() diff --git a/apps/common/handle/impl/text/pdf_split_handle.py b/apps/common/handle/impl/text/pdf_split_handle.py index c5fef5197..f7e41eb2d 100644 --- a/apps/common/handle/impl/text/pdf_split_handle.py +++ b/apps/common/handle/impl/text/pdf_split_handle.py @@ -6,7 +6,6 @@ @date:2024/3/27 18:19 @desc: """ -import logging import os import re import tempfile @@ -31,7 +30,6 @@ default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile("(?