MaxKB/apps/dataset/views/document.py
2025-01-13 18:11:20 +08:00

462 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
"""
@project: maxkb
@Author
@file document.py
@date2023/9/22 11:32
@desc:
"""
from drf_yasg.utils import swagger_auto_schema
from rest_framework.decorators import action
from rest_framework.parsers import MultiPartParser
from rest_framework.views import APIView
from rest_framework.views import Request
from common.auth import TokenAuth, has_permissions
from common.constants.permission_constants import Permission, Group, Operate, CompareConstants
from common.response import result
from common.util.common import query_params_to_single_dict
from dataset.serializers.common_serializers import BatchSerializer
from dataset.serializers.document_serializers import DocumentSerializers, DocumentWebInstanceSerializer
from dataset.swagger_api.document_api import DocumentApi
from django.utils.translation import gettext_lazy as _
class Template(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Get QA template'),
operation_id=_('Get QA template'),
manual_parameters=DocumentSerializers.Export.get_request_params_api(),
tags=[_('Knowledge Base/Documentation')])
def get(self, request: Request):
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).export(with_valid=True)
class TableTemplate(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Get form template'),
operation_id=_('Get form template'),
manual_parameters=DocumentSerializers.Export.get_request_params_api(),
tags=[_('Knowledge Base/Documentation')])
def get(self, request: Request):
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).table_export(with_valid=True)
class WebDocument(APIView):
authentication_classes = [TokenAuth]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Create Web site documents'),
operation_id=_('Create Web site documents'),
request_body=DocumentWebInstanceSerializer.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def post(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Create(data={'dataset_id': dataset_id}).save_web(request.data, with_valid=True))
class QaDocument(APIView):
authentication_classes = [TokenAuth]
parser_classes = [MultiPartParser]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Import QA and create documentation'),
operation_id=_('Import QA and create documentation'),
manual_parameters=DocumentWebInstanceSerializer.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Create.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def post(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Create(data={'dataset_id': dataset_id}).save_qa(
{'file_list': request.FILES.getlist('file')},
with_valid=True))
class TableDocument(APIView):
authentication_classes = [TokenAuth]
parser_classes = [MultiPartParser]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Import tables and create documents'),
operation_id=_('Import tables and create documents'),
manual_parameters=DocumentWebInstanceSerializer.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Create.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def post(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Create(data={'dataset_id': dataset_id}).save_table(
{'file_list': request.FILES.getlist('file')},
with_valid=True))
class Document(APIView):
authentication_classes = [TokenAuth]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Create document'),
operation_id=_('Create document'),
request_body=DocumentSerializers.Create.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def post(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Create(data={'dataset_id': dataset_id}).save(request.data, with_valid=True))
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Document list'),
operation_id=_('Document list'),
manual_parameters=DocumentSerializers.Query.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Query.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.USE,
dynamic_tag=k.get('dataset_id')))
def get(self, request: Request, dataset_id: str):
d = DocumentSerializers.Query(
data={**query_params_to_single_dict(request.query_params), 'dataset_id': dataset_id})
d.is_valid(raise_exception=True)
return result.success(d.list())
class BatchEditHitHandling(APIView):
authentication_classes = [TokenAuth]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Modify document hit processing methods in batches'),
operation_id=_('Modify document hit processing methods in batches'),
request_body=
DocumentApi.BatchEditHitHandlingApi.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_edit_hit_handling(request.data))
class Batch(APIView):
authentication_classes = [TokenAuth]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Create documents in batches'),
operation_id=_('Create documents in batches'),
request_body=
DocumentSerializers.Batch.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_api_array_response(
DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def post(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_save(request.data))
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Batch sync documents'),
operation_id=_('Batch sync documents'),
request_body=
BatchSerializer.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_sync(request.data))
@action(methods=['DELETE'], detail=False)
@swagger_auto_schema(operation_summary=_('Delete documents in batches'),
operation_id=_('Delete documents in batches'),
request_body=
BatchSerializer.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def delete(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data))
class SyncWeb(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Synchronize web site types'),
operation_id=_('Synchronize web site types'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Sync(data={'document_id': document_id, 'dataset_id': dataset_id}).sync(
))
class CancelTask(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Cancel task'),
operation_id=_('Cancel task'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
request_body=DocumentApi.Cancel.get_request_body_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).cancel(
request.data
))
class Batch(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Cancel tasks in batches'),
operation_id=_('Cancel tasks in batches'),
request_body=DocumentApi.BatchCancel.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_cancel(request.data))
class Refresh(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Refresh document vector library'),
operation_id=_('Refresh document vector library'),
request_body=DocumentApi.EmbeddingState.get_request_body_api(),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).refresh(
request.data.get('state_list')
))
class BatchRefresh(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Batch refresh document vector library'),
operation_id=_('Batch refresh document vector library'),
request_body=
DocumentApi.BatchEditHitHandlingApi.get_request_body_api(),
manual_parameters=DocumentSerializers.Create.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(
DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_refresh(request.data))
class Migrate(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Migrate documents in batches'),
operation_id=_('Migrate documents in batches'),
manual_parameters=DocumentSerializers.Migrate.get_request_params_api(),
request_body=DocumentSerializers.Migrate.get_request_body_api(),
responses=result.get_api_response(DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')),
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('target_dataset_id')),
compare=CompareConstants.AND
)
def put(self, request: Request, dataset_id: str, target_dataset_id: str):
return result.success(
DocumentSerializers.Migrate(
data={'dataset_id': dataset_id, 'target_dataset_id': target_dataset_id,
'document_id_list': request.data}).migrate(
))
class Export(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Export document'),
operation_id=_('Export document'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def get(self, request: Request, dataset_id: str, document_id: str):
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export()
class ExportZip(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Export Zip document'),
operation_id=_('Export Zip document'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def get(self, request: Request, dataset_id: str, document_id: str):
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export_zip()
class Operate(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Get document details'),
operation_id=_('Get document details'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_api_response(DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.USE,
dynamic_tag=k.get('dataset_id')))
def get(self, request: Request, dataset_id: str, document_id: str):
operate = DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id})
operate.is_valid(raise_exception=True)
return result.success(operate.one())
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Modify document'),
operation_id=_('Modify document'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
request_body=DocumentSerializers.Operate.get_request_body_api(),
responses=result.get_api_response(DocumentSerializers.Operate.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).edit(
request.data,
with_valid=True))
@action(methods=['DELETE'], detail=False)
@swagger_auto_schema(operation_summary=_('Delete document'),
operation_id=_('Delete document'),
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def delete(self, request: Request, dataset_id: str, document_id: str):
operate = DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id})
operate.is_valid(raise_exception=True)
return result.success(operate.delete())
class SplitPattern(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Get a list of segment IDs'),
operation_id=_('Get a list of segment IDs'),
tags=[_('Knowledge Base/Documentation')])
def get(self, request: Request):
return result.success(DocumentSerializers.SplitPattern.list())
class Split(APIView):
authentication_classes = [TokenAuth]
parser_classes = [MultiPartParser]
@action(methods=['POST'], detail=False)
@swagger_auto_schema(operation_summary=_('Segmented document'),
operation_id=_('Segmented document'),
manual_parameters=DocumentSerializers.Split.get_request_params_api(),
tags=[_('Knowledge Base/Documentation')])
def post(self, request: Request):
split_data = {'file': request.FILES.getlist('file')}
request_data = request.data
if 'patterns' in request.data and request.data.get('patterns') is not None and len(
request.data.get('patterns')) > 0:
split_data.__setitem__('patterns', request_data.getlist('patterns'))
if 'limit' in request.data:
split_data.__setitem__('limit', request_data.get('limit'))
if 'with_filter' in request.data:
split_data.__setitem__('with_filter', request_data.get('with_filter'))
ds = DocumentSerializers.Split(
data=split_data)
ds.is_valid(raise_exception=True)
return result.success(ds.parse())
class Page(APIView):
authentication_classes = [TokenAuth]
@action(methods=['GET'], detail=False)
@swagger_auto_schema(operation_summary=_('Get the knowledge base paginated list'),
operation_id=_('Get the knowledge base paginated list'),
manual_parameters=DocumentSerializers.Query.get_request_params_api(),
responses=result.get_page_api_response(DocumentSerializers.Query.get_response_body_api()),
tags=[_('Knowledge Base/Documentation')])
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.USE,
dynamic_tag=k.get('dataset_id')))
def get(self, request: Request, dataset_id: str, current_page, page_size):
d = DocumentSerializers.Query(
data={**query_params_to_single_dict(request.query_params), 'dataset_id': dataset_id})
d.is_valid(raise_exception=True)
return result.success(d.page(current_page, page_size))
class BatchGenerateRelated(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.BatchGenerateRelated(data={'dataset_id': dataset_id})
.batch_generate_related(request.data))