From efd273b3bc43f7d77d70e3418d3f1e2c9c01166e Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 3 Jun 2025 17:44:02 +0800 Subject: [PATCH] feat: add export and export_zip endpoints for knowledge base and document with image handling --- apps/common/utils/common.py | 8 +- apps/knowledge/api/document.py | 67 ++++++++- apps/knowledge/api/knowledge.py | 24 +++ apps/knowledge/serializers/document.py | 188 +++++++++++++++++++++++- apps/knowledge/serializers/knowledge.py | 80 +++++++++- apps/knowledge/urls.py | 6 + apps/knowledge/views/document.py | 58 +++++++- apps/knowledge/views/knowledge.py | 30 +++- 8 files changed, 450 insertions(+), 11 deletions(-) diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py index 7f4d88650..d3549cea9 100644 --- a/apps/common/utils/common.py +++ b/apps/common/utils/common.py @@ -318,4 +318,10 @@ def flat_map(array: List[List]): result = [] for e in array: result += e - return result \ No newline at end of file + return result + +def parse_image(content: str): + matches = re.finditer("!\[.*?\]\(\/api\/(image|file)\/.*?\)", content) + image_list = [match.group() for match in matches] + return image_list + diff --git a/apps/knowledge/api/document.py b/apps/knowledge/api/document.py index fbed798b3..2d95f8a93 100644 --- a/apps/knowledge/api/document.py +++ b/apps/knowledge/api/document.py @@ -383,6 +383,7 @@ class BatchRefreshAPI(APIMixin): def get_request(): return DocumentBatchRefreshSerializer + class BatchGenerateRelatedAPI(APIMixin): @staticmethod def get_parameters(): @@ -405,4 +406,68 @@ class BatchGenerateRelatedAPI(APIMixin): @staticmethod def get_request(): - return DocumentBatchGenerateRelatedSerializer \ No newline at end of file + return DocumentBatchGenerateRelatedSerializer + + +class TemplateExportAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="type", + description="Export template type csv|excel", + type=OpenApiTypes.STR, + location='query', + required=True, + ), + ] + + @staticmethod + def get_response(): + return DefaultResultSerializer + + +class DocumentExportAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="document_id", + description="文档id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + ] + + @staticmethod + def get_response(): + return DefaultResultSerializer diff --git a/apps/knowledge/api/knowledge.py b/apps/knowledge/api/knowledge.py index d194e9690..fbeb62a87 100644 --- a/apps/knowledge/api/knowledge.py +++ b/apps/knowledge/api/knowledge.py @@ -259,3 +259,27 @@ class GetModelAPI(SyncWebAPI): @staticmethod def get_response(): return DefaultResultSerializer + +class KnowledgeExportAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + ] + + @staticmethod + def get_response(): + return DefaultResultSerializer \ No newline at end of file diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 35e84c5e2..7e407de74 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -1,18 +1,24 @@ +import io import logging import os import re import traceback from functools import reduce +from tempfile import TemporaryDirectory from typing import Dict, List +import openpyxl import uuid_utils.compat as uuid from celery_once import AlreadyQueued from django.core import validators from django.db import transaction, models from django.db.models import QuerySet, Model from django.db.models.functions import Substr, Reverse -from django.utils.translation import gettext_lazy as _ +from django.http import HttpResponse +from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale +from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE from rest_framework import serializers +from xlwt import Utils from common.db.search import native_search, get_dynamics_model, native_page_search from common.event import ListenerManagement @@ -33,13 +39,13 @@ from common.handle.impl.text.text_split_handle import TextSplitHandle from common.handle.impl.text.xls_split_handle import XlsSplitHandle from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle from common.handle.impl.text.zip_split_handle import ZipSplitHandle -from common.utils.common import post, get_file_content, bulk_create_in_batches +from common.utils.common import post, get_file_content, bulk_create_in_batches, parse_image from common.utils.fork import Fork from common.utils.split_model import get_split_model, flat_map from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \ TaskType, File from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, \ - get_embedding_model_id_by_knowledge_id, MetaSerializer + get_embedding_model_id_by_knowledge_id, MetaSerializer, write_image, zip_dir from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \ delete_problems_and_mappings from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \ @@ -180,11 +186,66 @@ class BatchEditHitHandlingSerializer(serializers.Serializer): class DocumentSerializers(serializers.Serializer): + class Export(serializers.Serializer): + type = serializers.CharField(required=True, validators=[ + validators.RegexValidator(regex=re.compile("^csv|excel$"), + message=_('The template type only supports excel|csv'), + code=500) + ], label=_('type')) + + def export(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + language = get_language() + if self.data.get('type') == 'csv': + file = open( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', f'csv_template_{to_locale(language)}.csv'), + "rb") + content = file.read() + file.close() + return HttpResponse(content, status=200, headers={'Content-Type': 'text/csv', + 'Content-Disposition': 'attachment; filename="csv_template.csv"'}) + elif self.data.get('type') == 'excel': + file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', + f'excel_template_{to_locale(language)}.xlsx'), "rb") + content = file.read() + file.close() + return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel', + 'Content-Disposition': 'attachment; filename="excel_template.xlsx"'}) + else: + return None + + def table_export(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + language = get_language() + if self.data.get('type') == 'csv': + file = open( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', + f'table_template_{to_locale(language)}.csv'), + "rb") + content = file.read() + file.close() + return HttpResponse(content, status=200, headers={'Content-Type': 'text/cxv', + 'Content-Disposition': 'attachment; filename="csv_template.csv"'}) + elif self.data.get('type') == 'excel': + file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', + f'table_template_{to_locale(language)}.xlsx'), + "rb") + content = file.read() + file.close() + return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel', + 'Content-Disposition': 'attachment; filename="excel_template.xlsx"'}) + else: + return None + + class Query(serializers.Serializer): # 知识库id workspace_id = serializers.CharField(required=True, label=_('workspace id')) knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) - name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True, label=_('document name')) + name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True, + label=_('document name')) hit_handling_method = serializers.CharField(required=False, label=_('hit handling method')) is_active = serializers.BooleanField(required=False, label=_('document is active')) task_type = serializers.IntegerField(required=False, label=_('task type')) @@ -339,6 +400,53 @@ class DocumentSerializers(serializers.Serializer): if not QuerySet(Document).filter(id=document_id).exists(): raise AppApiException(500, _('document id not exist')) + def export(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + document = QuerySet(Document).filter(id=self.data.get("document_id")).first() + paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")), + get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', + 'list_paragraph_document_name.sql'))) + problem_mapping_list = native_search( + QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')), + with_table_name=True) + data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document]) + workbook = self.get_workbook(data_dict, document_dict) + response = HttpResponse(content_type='application/vnd.ms-excel') + response['Content-Disposition'] = f'attachment; filename="data.xlsx"' + workbook.save(response) + return response + + def export_zip(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + document = QuerySet(Document).filter(id=self.data.get("document_id")).first() + paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")), + get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', + 'list_paragraph_document_name.sql'))) + problem_mapping_list = native_search( + QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')), + with_table_name=True) + data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document]) + res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list] + + workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict) + response = HttpResponse(content_type='application/zip') + response['Content-Disposition'] = 'attachment; filename="archive.zip"' + zip_buffer = io.BytesIO() + with TemporaryDirectory() as tempdir: + knowledge_file = os.path.join(tempdir, 'knowledge.xlsx') + workbook.save(knowledge_file) + for r in res: + write_image(tempdir, r) + zip_dir(tempdir, zip_buffer) + response.write(zip_buffer.getvalue()) + return response + def one(self, with_valid=False): if with_valid: self.is_valid(raise_exception=True) @@ -441,6 +549,78 @@ class DocumentSerializers(serializers.Serializer): except AlreadyQueued as e: raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.')) + @staticmethod + def get_workbook(data_dict, document_dict): + # 创建工作簿对象 + workbook = openpyxl.Workbook() + workbook.remove(workbook.active) + if len(data_dict.keys()) == 0: + data_dict['sheet'] = [] + for sheet_id in data_dict: + # 添加工作表 + worksheet = workbook.create_sheet(document_dict.get(sheet_id)) + data = [ + [gettext('Section title (optional)'), + gettext('Section content (required, question answer, no more than 4096 characters)'), + gettext('Question (optional, one per line in the cell)')], + *data_dict.get(sheet_id, []) + ] + # 写入数据到工作表 + for row_idx, row in enumerate(data): + for col_idx, col in enumerate(row): + cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1) + if isinstance(col, str): + col = re.sub(ILLEGAL_CHARACTERS_RE, '', col) + if col.startswith(('=', '+', '-', '@')): + col = '\ufeff' + col + cell.value = col + # 创建HttpResponse对象返回Excel文件 + return workbook + + @staticmethod + def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict], document_list): + result = {} + document_dict = {} + + for paragraph in paragraph_list: + problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if + problem_mapping.get('paragraph_id') == paragraph.get('id')] + document_sheet = result.get(paragraph.get('document_id')) + document_name = DocumentSerializers.Operate.reset_document_name(paragraph.get('document_name')) + d = document_dict.get(document_name) + if d is None: + document_dict[document_name] = {paragraph.get('document_id')} + else: + d.add(paragraph.get('document_id')) + + if document_sheet is None: + result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'), + '\n'.join(problem_list)]] + else: + document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)]) + for document in document_list: + if document.id not in result: + document_name = DocumentSerializers.Operate.reset_document_name(document.name) + result[document.id] = [[]] + d = document_dict.get(document_name) + if d is None: + document_dict[document_name] = {document.id} + else: + d.add(document.id) + result_document_dict = {} + for d_name in document_dict: + for index, d_id in enumerate(document_dict.get(d_name)): + result_document_dict[d_id] = d_name if index == 0 else d_name + str(index) + return result, result_document_dict + + @staticmethod + def reset_document_name(document_name): + if document_name is not None: + document_name = document_name.strip()[0:29] + if document_name is None or not Utils.valid_sheet_name(document_name): + return "Sheet" + return document_name.strip() + class Create(serializers.Serializer): workspace_id = serializers.UUIDField(required=True, label=_('workspace id')) knowledge_id = serializers.UUIDField(required=True, label=_('document id')) diff --git a/apps/knowledge/serializers/knowledge.py b/apps/knowledge/serializers/knowledge.py index 5190d5e99..5b2d0a170 100644 --- a/apps/knowledge/serializers/knowledge.py +++ b/apps/knowledge/serializers/knowledge.py @@ -1,9 +1,11 @@ +import io import logging import os import re import traceback from functools import reduce -from typing import Dict +from tempfile import TemporaryDirectory +from typing import Dict, List import uuid_utils.compat as uuid from celery_once import AlreadyQueued @@ -11,6 +13,7 @@ from django.core import validators from django.db import transaction, models from django.db.models import QuerySet from django.db.models.functions import Reverse, Substr +from django.http import HttpResponse from django.utils.translation import gettext_lazy as _ from rest_framework import serializers @@ -20,13 +23,13 @@ from common.db.search import native_search, get_dynamics_model, native_page_sear from common.db.sql_execute import select_list from common.event import ListenerManagement from common.exception.app_exception import AppApiException -from common.utils.common import valid_license, post, get_file_content +from common.utils.common import valid_license, post, get_file_content, parse_image from common.utils.fork import Fork, ChildLink from common.utils.split_model import get_split_model from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \ ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \ - GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph + GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph, write_image, zip_dir from knowledge.serializers.document import DocumentSerializers from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge from knowledge.task.generate import generate_related_by_knowledge_id @@ -330,6 +333,77 @@ class KnowledgeSerializer(serializers.Serializer): knowledge.delete() delete_embedding_by_knowledge(self.data.get('knowledge_id')) return True + + def export_excel(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id')) + paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql'))) + problem_mapping_list = native_search( + QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')), + with_table_name=True) + data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list, + document_list) + workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict) + response = HttpResponse(content_type='application/vnd.ms-excel') + response['Content-Disposition'] = 'attachment; filename="knowledge.xlsx"' + workbook.save(response) + return response + + def export_zip(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id')) + paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql'))) + problem_mapping_list = native_search( + QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')), + with_table_name=True) + data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list, + document_list) + res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list] + + workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict) + response = HttpResponse(content_type='application/zip') + response['Content-Disposition'] = 'attachment; filename="archive.zip"' + zip_buffer = io.BytesIO() + with TemporaryDirectory() as tempdir: + knowledge_file = os.path.join(tempdir, 'knowledge.xlsx') + workbook.save(knowledge_file) + for r in res: + write_image(tempdir, r) + zip_dir(tempdir, zip_buffer) + response.write(zip_buffer.getvalue()) + return response + + @staticmethod + def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict]): + result = {} + document_dict = {} + + for paragraph in paragraph_list: + problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if + problem_mapping.get('paragraph_id') == paragraph.get('id')] + document_sheet = result.get(paragraph.get('document_id')) + d = document_dict.get(paragraph.get('document_name')) + if d is None: + document_dict[paragraph.get('document_name')] = {paragraph.get('document_id')} + else: + d.add(paragraph.get('document_id')) + + if document_sheet is None: + result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'), + '\n'.join(problem_list)]] + else: + document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)]) + result_document_dict = {} + for d_name in document_dict: + for index, d_id in enumerate(document_dict.get(d_name)): + result_document_dict[d_id] = d_name if index == 0 else d_name + str(index) + return result, result_document_dict class Create(serializers.Serializer): user_id = serializers.UUIDField(required=True, label=_('user id')) diff --git a/apps/knowledge/urls.py b/apps/knowledge/urls.py index 564c32aca..6ba92b6b3 100644 --- a/apps/knowledge/urls.py +++ b/apps/knowledge/urls.py @@ -14,6 +14,8 @@ urlpatterns = [ path('workspace//knowledge//generate_related', views.KnowledgeView.GenerateRelated.as_view()), path('workspace//knowledge//embedding', views.KnowledgeView.Embedding.as_view()), path('workspace//knowledge//hit_test', views.KnowledgeView.HitTest.as_view()), + path('workspace//knowledge//export', views.KnowledgeView.Export.as_view()), + path('workspace//knowledge//export_zip', views.KnowledgeView.ExportZip.as_view()), path('workspace//knowledge//document', views.DocumentView.as_view()), path('workspace//knowledge//document/split', views.DocumentView.Split.as_view()), path('workspace//knowledge//document/split_pattern', views.DocumentView.SplitPattern.as_view()), @@ -26,11 +28,15 @@ urlpatterns = [ path('workspace//knowledge//document/qa', views.QaDocumentView.as_view()), path('workspace//knowledge//document/table', views.TableDocumentView.as_view()), path('workspace//knowledge//document/batch_hit_handling', views.DocumentView.BatchEditHitHandling.as_view()), + path('workspace//knowledge//document/template/export', views.Template.as_view()), + path('workspace//knowledge//document/table_template/export', views.TableTemplate.as_view()), path('workspace//knowledge//document/', views.DocumentView.Operate.as_view()), path('workspace//knowledge//document//sync', views.DocumentView.SyncWeb.as_view()), path('workspace//knowledge//document//refresh', views.DocumentView.Refresh.as_view()), path('workspace//knowledge//document//cancel_task', views.DocumentView.CancelTask.as_view()), path('workspace//knowledge//document//batch_cancel_task', views.DocumentView.BatchCancelTask.as_view()), + path('workspace//knowledge//document//export', views.DocumentView.Export.as_view()), + path('workspace//knowledge//document//export_zip', views.DocumentView.ExportZip.as_view()), path('workspace//knowledge//document//paragraph', views.ParagraphView.as_view()), path('workspace//knowledge//document//paragraph/batch_delete', views.ParagraphView.BatchDelete.as_view()), path('workspace//knowledge//document//paragraph/batch_generate_related', views.ParagraphView.BatchGenerateRelated.as_view()), diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index 69b2fb94c..0ac92fef9 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -11,7 +11,8 @@ from common.result import result from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \ DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \ WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \ - DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI + DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI, TemplateExportAPI, \ + DocumentExportAPI from knowledge.serializers.document import DocumentSerializers @@ -384,6 +385,34 @@ class DocumentView(APIView): } ).page(current_page, page_size)) + class Export(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Export document'), + operation_id=_('Export document'), # type: ignore + parameters=DocumentExportAPI.get_parameters(), + responses=DocumentExportAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] # type: ignore + ) + @has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission()) + def get(self, request: Request, dataset_id: str, document_id: str): + return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export() + + class ExportZip(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Export Zip document'), + operation_id=_('Export Zip document'), # type: ignore + parameters=DocumentExportAPI.get_parameters(), + responses=DocumentExportAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] # type: ignore + ) + @has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission()) + def get(self, request: Request, dataset_id: str, document_id: str): + return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export_zip() + class WebDocumentView(APIView): authentication_classes = [TokenAuth] @@ -443,3 +472,30 @@ class TableDocumentView(APIView): return result.success(DocumentSerializers.Create( data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id} ).save_table({'file_list': request.FILES.getlist('file')}, with_valid=True)) + + +class Template(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Get QA template'), + operation_id=_('Get QA template'), # type: ignore + parameters=TemplateExportAPI.get_parameters(), + responses=TemplateExportAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] # type: ignore + ) + def get(self, request: Request): + return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).export(with_valid=True) + + +class TableTemplate(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Get form template'), + operation_id=_('Get form template'), # type: ignore + parameters=TemplateExportAPI.get_parameters(), + responses=TemplateExportAPI.get_response(), + tags=[_('Knowledge Base/Documentation')]) # type: ignore + def get(self, request: Request): + return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).table_export(with_valid=True) diff --git a/apps/knowledge/views/knowledge.py b/apps/knowledge/views/knowledge.py index 8b60294d7..b7ddfa329 100644 --- a/apps/knowledge/views/knowledge.py +++ b/apps/knowledge/views/knowledge.py @@ -9,7 +9,7 @@ from common.constants.permission_constants import PermissionConstants from common.result import result from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \ KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI, SyncWebAPI, GenerateRelatedAPI, HitTestAPI, EmbeddingAPI, \ - GetModelAPI + GetModelAPI, KnowledgeExportAPI from knowledge.serializers.knowledge import KnowledgeSerializer from models_provider.serializers.model_serializer import ModelSerializer @@ -182,6 +182,34 @@ class KnowledgeView(APIView): data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id, 'user_id': request.user.id} ).embedding()) + class Export(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Export knowledge base'), + operation_id=_('Export knowledge base'), # type: ignore + parameters=KnowledgeExportAPI.get_parameters(), + responses=KnowledgeExportAPI.get_response(), + tags=[_('Knowledge Base')] # type: ignore + ) + @has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission()) + def get(self, request: Request, knowledge_id: str): + return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_excel() + + class ExportZip(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + summary=_('Export knowledge base containing images'), + operation_id=_('Export knowledge base containing images'), # type: ignore + parameters=KnowledgeExportAPI.get_parameters(), + responses=KnowledgeExportAPI.get_response(), + tags=[_('Knowledge Base')] # type: ignore + ) + @has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission()) + def get(self, request: Request, knowledge_id: str): + return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_zip() + class GenerateRelated(APIView): authentication_classes = [TokenAuth]