diff --git a/apps/common/handle/impl/qa/zip_parse_qa_handle.py b/apps/common/handle/impl/qa/zip_parse_qa_handle.py index d00bc14dd..af44b809c 100644 --- a/apps/common/handle/impl/qa/zip_parse_qa_handle.py +++ b/apps/common/handle/impl/qa/zip_parse_qa_handle.py @@ -9,12 +9,12 @@ import io import os import re -import uuid_utils.compat as uuid import zipfile from typing import List from urllib.parse import urljoin -from django.db.models import QuerySet +import uuid_utils.compat as uuid +from django.utils.translation import gettext_lazy as _ from common.handle.base_parse_qa_handle import BaseParseQAHandle from common.handle.impl.qa.csv_parse_qa_handle import CsvParseQAHandle @@ -22,7 +22,6 @@ from common.handle.impl.qa.xls_parse_qa_handle import XlsParseQAHandle from common.handle.impl.qa.xlsx_parse_qa_handle import XlsxParseQAHandle from common.utils.common import parse_md_image from knowledge.models import File -from django.utils.translation import gettext_lazy as _ class FileBufferHandle: diff --git a/apps/common/handle/impl/table/csv_parse_table_handle.py b/apps/common/handle/impl/table/csv_parse_table_handle.py index e2fc7ce86..4971c424f 100644 --- a/apps/common/handle/impl/table/csv_parse_table_handle.py +++ b/apps/common/handle/impl/table/csv_parse_table_handle.py @@ -15,7 +15,7 @@ class CsvSplitHandle(BaseParseTableHandle): return True return False - def handle(self, file, get_buffer,save_image): + def handle(self, file, get_buffer, save_image): buffer = get_buffer(file) try: content = buffer.decode(detect(buffer)['encoding']) @@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle): return buffer.decode(detect(buffer)['encoding']) except BaseException as e: max_kb.error(f'csv split handle error: {e}') - return f'error: {e}' \ No newline at end of file + return f'error: {e}' diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py index 7b50683fa..c7364169f 100644 --- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py +++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py @@ -78,7 +78,6 @@ class XlsxSplitHandle(BaseParseTableHandle): return [{'name': file.name, 'paragraphs': []}] return result - def get_content(self, file, save_image): try: # 加载 Excel 文件 diff --git a/apps/common/handle/impl/text/__init__.py b/apps/common/handle/impl/text/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/common/handle/impl/csv_split_handle.py b/apps/common/handle/impl/text/csv_split_handle.py similarity index 100% rename from apps/common/handle/impl/csv_split_handle.py rename to apps/common/handle/impl/text/csv_split_handle.py diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/text/doc_split_handle.py similarity index 99% rename from apps/common/handle/impl/doc_split_handle.py rename to apps/common/handle/impl/text/doc_split_handle.py index 752f726a2..d43462ccb 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/text/doc_split_handle.py @@ -10,10 +10,10 @@ import io import os import re import traceback -import uuid_utils.compat as uuid from functools import reduce from typing import List +import uuid_utils.compat as uuid from docx import Document, ImagePart from docx.oxml import ns from docx.table import Table @@ -22,7 +22,6 @@ from docx.text.paragraph import Paragraph from common.handle.base_split_handle import BaseSplitHandle from common.utils.split_model import SplitModel from knowledge.models import File -from django.utils.translation import gettext_lazy as _ default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile('(?<=\\n)(?