feat: implement Paragraph API for CRUD operations and batch deletion

This commit is contained in:
CaptainB 2025-05-07 12:02:51 +08:00
parent bcc7c1acf1
commit 10105ce5ab
5 changed files with 548 additions and 2 deletions

View File

@ -0,0 +1,208 @@
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import OpenApiParameter
from common.mixins.api_mixin import APIMixin
from common.result import DefaultResultSerializer, ResultSerializer
from knowledge.serializers.common import BatchSerializer
from knowledge.serializers.paragraph import ParagraphSerializer
from knowledge.serializers.problem import ProblemSerializer
class ParagraphReadResponse(ResultSerializer):
@staticmethod
def get_data():
return ParagraphSerializer(many=True)
class ParagraphReadAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="title",
description="标题",
type=OpenApiTypes.STR,
location='query',
required=False,
),
OpenApiParameter(
name="content",
description="内容",
type=OpenApiTypes.STR,
location='query',
required=False,
),
]
@staticmethod
def get_response():
return ParagraphReadResponse
class ParagraphCreateAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_request():
return ParagraphSerializer
@staticmethod
def get_response():
return ParagraphReadResponse
class ParagraphBatchDeleteAPI(ParagraphCreateAPI):
@staticmethod
def get_request():
return BatchSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class ParagraphGetAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
class ParagraphEditAPI(ParagraphGetAPI):
@staticmethod
def get_request():
return ParagraphSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class ProblemCreateAPI(ParagraphGetAPI):
@staticmethod
def get_request():
return ProblemSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class UnAssociationAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="problem_id",
description="问题id",
type=OpenApiTypes.STR,
location='path',
required=True,
)
]
class AssociationAPI(UnAssociationAPI):
pass

View File

@ -8,15 +8,17 @@ from django.db.models import QuerySet, Count
from django.utils.translation import gettext_lazy as _
from rest_framework import serializers
from common.db.search import page_search
from common.exception.app_exception import AppApiException
from common.utils.common import post
from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \
get_embedding_model_id_by_knowledge_id, update_document_char_length
get_embedding_model_id_by_knowledge_id, update_document_char_length, BatchSerializer
from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers
from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \
disable_embedding_by_paragraph, \
delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task
delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task, delete_embedding_by_paragraph_ids, \
embedding_by_problem, delete_embedding_by_source
class ParagraphSerializer(serializers.ModelSerializer):
@ -115,6 +117,7 @@ class ParagraphSerializers(serializers.Serializer):
).one(with_valid=True)
class Operate(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
# 段落id
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
# 知识库id
@ -282,6 +285,100 @@ class ParagraphSerializers(serializers.Serializer):
else:
return Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id)
class Query(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
title = serializers.CharField(required=False, label=_('section title'))
content = serializers.CharField(required=False)
def get_query_set(self):
query_set = QuerySet(model=Paragraph)
query_set = query_set.filter(
**{'knowledge_id': self.data.get('knowledge_id'), 'document_id': self.data.get("document_id")})
if 'title' in self.data:
query_set = query_set.filter(
**{'title__icontains': self.data.get('title')})
if 'content' in self.data:
query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
query_set.order_by('-create_time', 'id')
return query_set
def list(self):
return list(map(lambda row: ParagraphSerializer(row).data, self.get_query_set()))
def page(self, current_page, page_size):
query_set = self.get_query_set()
return page_search(current_page, page_size, query_set, lambda row: ParagraphSerializer(row).data)
class Association(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
problem_id = serializers.UUIDField(required=True, label=_('problem id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
def is_valid(self, *, raise_exception=True):
super().is_valid(raise_exception=True)
knowledge_id = self.data.get('knowledge_id')
paragraph_id = self.data.get('paragraph_id')
problem_id = self.data.get("problem_id")
if not QuerySet(Paragraph).filter(knowledge_id=knowledge_id, id=paragraph_id).exists():
raise AppApiException(500, _('Paragraph does not exist'))
if not QuerySet(Problem).filter(knowledge_id=knowledge_id, id=problem_id).exists():
raise AppApiException(500, _('Problem does not exist'))
def association(self, with_valid=True, with_embedding=True):
if with_valid:
self.is_valid(raise_exception=True)
problem = QuerySet(Problem).filter(id=self.data.get("problem_id")).first()
problem_paragraph_mapping = ProblemParagraphMapping(id=uuid.uuid7(),
document_id=self.data.get('document_id'),
paragraph_id=self.data.get('paragraph_id'),
knowledge_id=self.data.get('knowledge_id'),
problem_id=problem.id)
problem_paragraph_mapping.save()
if with_embedding:
model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id'))
embedding_by_problem({
'text': problem.content,
'is_active': True,
'source_type': SourceType.PROBLEM,
'source_id': problem_paragraph_mapping.id,
'document_id': self.data.get('document_id'),
'paragraph_id': self.data.get('paragraph_id'),
'knowledge_id': self.data.get('knowledge_id'),
}, model_id)
def un_association(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(
paragraph_id=self.data.get('paragraph_id'),
knowledge_id=self.data.get('knowledge_id'),
problem_id=self.data.get(
'problem_id')).first()
problem_paragraph_mapping_id = problem_paragraph_mapping.id
problem_paragraph_mapping.delete()
delete_embedding_by_source(problem_paragraph_mapping_id)
return True
class Batch(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
@transaction.atomic
def batch_delete(self, instance: Dict, with_valid=True):
if with_valid:
BatchSerializer(data=instance).is_valid(model=Paragraph, raise_exception=True)
self.is_valid(raise_exception=True)
paragraph_id_list = instance.get("id_list")
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
delete_problems_and_mappings(paragraph_id_list)
update_document_char_length(self.data.get('document_id'))
# 删除向量库
delete_embedding_by_paragraph_ids(paragraph_id_list)
return True
def delete_problems_and_mappings(paragraph_ids):
problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids)

View File

@ -21,6 +21,12 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task/batch', views.DocumentView.BatchCancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch', views.ParagraphView.Batch.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>', views.ParagraphView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem', views.ParagraphView.Problem.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/association', views.ParagraphView.Association.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/unassociation', views.ParagraphView.UnAssociation.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<int:current_page>/<int:page_sige>', views.DocumentView.Page.as_view()),
path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()),
]

View File

@ -1,2 +1,3 @@
from .document import *
from .knowledge import *
from .paragraph import *

View File

@ -0,0 +1,234 @@
from django.utils.translation import gettext_lazy as _
from drf_spectacular.utils import extend_schema
from rest_framework.views import APIView
from rest_framework.views import Request
from common.auth import TokenAuth
from common.auth.authentication import has_permissions
from common.constants.permission_constants import PermissionConstants
from common.result import result
from common.utils.common import query_params_to_single_dict
from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \
ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI
from knowledge.serializers.paragraph import ParagraphSerializers
class ParagraphView(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Paragraph list'),
description=_('Paragraph list'),
operation_id=_('Paragraph list'),
parameters=ParagraphReadAPI.get_parameters(),
responses=ParagraphReadAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
q = ParagraphSerializers.Query(
data={
**query_params_to_single_dict(request.query_params),
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
q.is_valid(raise_exception=True)
return result.success(q.list())
@extend_schema(
summary=_('Create Paragraph'),
operation_id=_('Create Paragraph'),
parameters=ParagraphCreateAPI.get_parameters(),
request=ParagraphCreateAPI.get_request(),
responses=ParagraphCreateAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_CREATE.get_workspace_permission())
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Create(
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
).save(request.data))
class Batch(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['DELETE'],
summary=_('Batch Paragraph'),
description=_('Batch Paragraph'),
operation_id=_('Batch Paragraph'),
parameters=ParagraphBatchDeleteAPI.get_parameters(),
request=ParagraphBatchDeleteAPI.get_request(),
responses=ParagraphBatchDeleteAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Batch(
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
).batch_delete(request.data))
class Operate(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Modify paragraph data'),
description=_('Modify paragraph data'),
operation_id=_('Modify paragraph data'),
parameters=ParagraphEditAPI.get_parameters(),
request=ParagraphEditAPI.get_request(),
responses=ParagraphEditAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.edit(request.data))
@extend_schema(
methods=['GET'],
summary=_('Get paragraph details'),
description=_('Get paragraph details'),
operation_id=_('Get paragraph details'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.one())
@extend_schema(
methods=['DELETE'],
summary=_('Delete paragraph'),
description=_('Delete paragraph'),
operation_id=_('Delete paragraph'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')])
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.delete())
class Problem(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
summary=_('Add associated questions'),
description=_('Add associated questions'),
operation_id=_('Add associated questions'),
parameters=ProblemCreateAPI.get_parameters(),
request=ProblemCreateAPI.get_request(),
responses=ProblemCreateAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
return result.success(ParagraphSerializers.Problem(
data={
'workspace_id': workspace_id,
"knowledge_id": knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id
}
).save(request.data, with_valid=True))
@extend_schema(
methods=['GET'],
summary=_('Get a list of paragraph questions'),
description=_('Get a list of paragraph questions'),
operation_id=_('Get a list of paragraph questions'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
return result.success(ParagraphSerializers.Problem(
data={
'workspace_id': workspace_id,
"knowledge_id": knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id
}
).list(with_valid=True))
class UnAssociation(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Disassociation issue'),
description=_('Disassociation issue'),
operation_id=_('Disassociation issue'),
parameters=UnAssociationAPI.get_parameters(),
responses=UnAssociationAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request,
workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
return result.success(ParagraphSerializers.Association(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id,
'problem_id': problem_id
}
).un_association())
class Association(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Related questions'),
description=_('Related questions'),
operation_id=_('Related questions'),
parameters=AssociationAPI.get_parameters(),
responses=AssociationAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request,
workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
return result.success(ParagraphSerializers.Association(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id,
'problem_id': problem_id
}
).association())