From 43bef216d51030047323ee2e1ea600f3bc9ac6b4 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Wed, 30 Apr 2025 16:06:30 +0800 Subject: [PATCH] refactor: reorganize file handling imports into a structured directory --- .../common/handle/impl/qa/zip_parse_qa_handle.py | 5 ++--- .../handle/impl/table/csv_parse_table_handle.py | 4 ++-- .../handle/impl/table/xlsx_parse_table_handle.py | 1 - apps/common/handle/impl/text/__init__.py | 0 .../handle/impl/{ => text}/csv_split_handle.py | 0 .../handle/impl/{ => text}/doc_split_handle.py | 3 +-- .../handle/impl/{ => text}/html_split_handle.py | 2 +- .../handle/impl/{ => text}/pdf_split_handle.py | 8 +++++--- .../handle/impl/{ => text}/text_split_handle.py | 4 ++-- .../handle/impl/{ => text}/xls_split_handle.py | 0 .../handle/impl/{ => text}/xlsx_split_handle.py | 0 .../handle/impl/{ => text}/zip_split_handle.py | 14 +++++++------- apps/knowledge/serializers/document.py | 16 ++++++++-------- 13 files changed, 28 insertions(+), 29 deletions(-) create mode 100644 apps/common/handle/impl/text/__init__.py rename apps/common/handle/impl/{ => text}/csv_split_handle.py (100%) rename apps/common/handle/impl/{ => text}/doc_split_handle.py (99%) rename apps/common/handle/impl/{ => text}/html_split_handle.py (99%) rename apps/common/handle/impl/{ => text}/pdf_split_handle.py (98%) rename apps/common/handle/impl/{ => text}/text_split_handle.py (96%) rename apps/common/handle/impl/{ => text}/xls_split_handle.py (100%) rename apps/common/handle/impl/{ => text}/xlsx_split_handle.py (100%) rename apps/common/handle/impl/{ => text}/zip_split_handle.py (92%) diff --git a/apps/common/handle/impl/qa/zip_parse_qa_handle.py b/apps/common/handle/impl/qa/zip_parse_qa_handle.py index d00bc14dd..af44b809c 100644 --- a/apps/common/handle/impl/qa/zip_parse_qa_handle.py +++ b/apps/common/handle/impl/qa/zip_parse_qa_handle.py @@ -9,12 +9,12 @@ import io import os import re -import uuid_utils.compat as uuid import zipfile from typing import List from urllib.parse import urljoin -from django.db.models import QuerySet +import uuid_utils.compat as uuid +from django.utils.translation import gettext_lazy as _ from common.handle.base_parse_qa_handle import BaseParseQAHandle from common.handle.impl.qa.csv_parse_qa_handle import CsvParseQAHandle @@ -22,7 +22,6 @@ from common.handle.impl.qa.xls_parse_qa_handle import XlsParseQAHandle from common.handle.impl.qa.xlsx_parse_qa_handle import XlsxParseQAHandle from common.utils.common import parse_md_image from knowledge.models import File -from django.utils.translation import gettext_lazy as _ class FileBufferHandle: diff --git a/apps/common/handle/impl/table/csv_parse_table_handle.py b/apps/common/handle/impl/table/csv_parse_table_handle.py index e2fc7ce86..4971c424f 100644 --- a/apps/common/handle/impl/table/csv_parse_table_handle.py +++ b/apps/common/handle/impl/table/csv_parse_table_handle.py @@ -15,7 +15,7 @@ class CsvSplitHandle(BaseParseTableHandle): return True return False - def handle(self, file, get_buffer,save_image): + def handle(self, file, get_buffer, save_image): buffer = get_buffer(file) try: content = buffer.decode(detect(buffer)['encoding']) @@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle): return buffer.decode(detect(buffer)['encoding']) except BaseException as e: max_kb.error(f'csv split handle error: {e}') - return f'error: {e}' \ No newline at end of file + return f'error: {e}' diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py index 7b50683fa..c7364169f 100644 --- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py +++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py @@ -78,7 +78,6 @@ class XlsxSplitHandle(BaseParseTableHandle): return [{'name': file.name, 'paragraphs': []}] return result - def get_content(self, file, save_image): try: # 加载 Excel 文件 diff --git a/apps/common/handle/impl/text/__init__.py b/apps/common/handle/impl/text/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/common/handle/impl/csv_split_handle.py b/apps/common/handle/impl/text/csv_split_handle.py similarity index 100% rename from apps/common/handle/impl/csv_split_handle.py rename to apps/common/handle/impl/text/csv_split_handle.py diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/text/doc_split_handle.py similarity index 99% rename from apps/common/handle/impl/doc_split_handle.py rename to apps/common/handle/impl/text/doc_split_handle.py index 752f726a2..d43462ccb 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/text/doc_split_handle.py @@ -10,10 +10,10 @@ import io import os import re import traceback -import uuid_utils.compat as uuid from functools import reduce from typing import List +import uuid_utils.compat as uuid from docx import Document, ImagePart from docx.oxml import ns from docx.table import Table @@ -22,7 +22,6 @@ from docx.text.paragraph import Paragraph from common.handle.base_split_handle import BaseSplitHandle from common.utils.split_model import SplitModel from knowledge.models import File -from django.utils.translation import gettext_lazy as _ default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile('(?<=\\n)(?