feat: implement Paragraph API for CRUD operations and batch deletion

2025-12-26 01:33:05 +00:00 · 2025-05-07 12:02:51 +08:00 · 2025-05-07 12:02:51 +08:00 · 10105ce5ab
parent bcc7c1acf1
commit 10105ce5ab
5 changed files with 548 additions and 2 deletions
--- a/apps/knowledge/api/paragraph.py
+++ b/apps/knowledge/api/paragraph.py
@ -0,0 +1,208 @@
+from drf_spectacular.types import OpenApiTypes
+from drf_spectacular.utils import OpenApiParameter
+
+from common.mixins.api_mixin import APIMixin
+from common.result import DefaultResultSerializer, ResultSerializer
+from knowledge.serializers.common import BatchSerializer
+from knowledge.serializers.paragraph import ParagraphSerializer
+from knowledge.serializers.problem import ProblemSerializer
+
+
+class ParagraphReadResponse(ResultSerializer):
+    @staticmethod
+    def get_data():
+        return ParagraphSerializer(many=True)
+
+
+class ParagraphReadAPI(APIMixin):
+    @staticmethod
+    def get_parameters():
+        return [
+            OpenApiParameter(
+                name="workspace_id",
+                description="工作空间id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="knowledge_id",
+                description="知识库id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="document_id",
+                description="文档id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="title",
+                description="标题",
+                type=OpenApiTypes.STR,
+                location='query',
+                required=False,
+            ),
+            OpenApiParameter(
+                name="content",
+                description="内容",
+                type=OpenApiTypes.STR,
+                location='query',
+                required=False,
+            ),
+        ]
+
+    @staticmethod
+    def get_response():
+        return ParagraphReadResponse
+
+
+class ParagraphCreateAPI(APIMixin):
+    @staticmethod
+    def get_parameters():
+        return [
+            OpenApiParameter(
+                name="workspace_id",
+                description="工作空间id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="knowledge_id",
+                description="知识库id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="document_id",
+                description="文档id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+        ]
+
+    @staticmethod
+    def get_request():
+        return ParagraphSerializer
+
+    @staticmethod
+    def get_response():
+        return ParagraphReadResponse
+
+
+class ParagraphBatchDeleteAPI(ParagraphCreateAPI):
+    @staticmethod
+    def get_request():
+        return BatchSerializer
+
+    @staticmethod
+    def get_response():
+        return DefaultResultSerializer
+
+
+class ParagraphGetAPI(APIMixin):
+    @staticmethod
+    def get_parameters():
+        return [
+            OpenApiParameter(
+                name="workspace_id",
+                description="工作空间id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="knowledge_id",
+                description="知识库id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="document_id",
+                description="文档id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="paragraph_id",
+                description="段落id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+        ]
+
+
+class ParagraphEditAPI(ParagraphGetAPI):
+
+    @staticmethod
+    def get_request():
+        return ParagraphSerializer
+
+    @staticmethod
+    def get_response():
+        return DefaultResultSerializer
+
+
+class ProblemCreateAPI(ParagraphGetAPI):
+    @staticmethod
+    def get_request():
+        return ProblemSerializer
+
+    @staticmethod
+    def get_response():
+        return DefaultResultSerializer
+
+
+class UnAssociationAPI(APIMixin):
+    @staticmethod
+    def get_parameters():
+        return [
+            OpenApiParameter(
+                name="workspace_id",
+                description="工作空间id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="knowledge_id",
+                description="知识库id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="document_id",
+                description="文档id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="paragraph_id",
+                description="段落id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            ),
+            OpenApiParameter(
+                name="problem_id",
+                description="问题id",
+                type=OpenApiTypes.STR,
+                location='path',
+                required=True,
+            )
+        ]
+
+
+class AssociationAPI(UnAssociationAPI):
+    pass
--- a/apps/knowledge/serializers/paragraph.py
+++ b/apps/knowledge/serializers/paragraph.py
@ -8,15 +8,17 @@ from django.db.models import QuerySet, Count
 from django.utils.translation import gettext_lazy as _
 from rest_framework import serializers

+from common.db.search import page_search
 from common.exception.app_exception import AppApiException
 from common.utils.common import post
 from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType
 from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \
-    get_embedding_model_id_by_knowledge_id, update_document_char_length
+    get_embedding_model_id_by_knowledge_id, update_document_char_length, BatchSerializer
 from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers
 from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \
    disable_embedding_by_paragraph, \
-    delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task
+    delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task, delete_embedding_by_paragraph_ids, \
+    embedding_by_problem, delete_embedding_by_source


 class ParagraphSerializer(serializers.ModelSerializer):
@ -115,6 +117,7 @@ class ParagraphSerializers(serializers.Serializer):
            ).one(with_valid=True)

    class Operate(serializers.Serializer):
+        workspace_id = serializers.CharField(required=True, label=_('workspace id'))
        # 段落id
        paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
        # 知识库id
@ -282,6 +285,100 @@ class ParagraphSerializers(serializers.Serializer):
            else:
                return Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id)

+    class Query(serializers.Serializer):
+        knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
+        document_id = serializers.UUIDField(required=True, label=_('document id'))
+        title = serializers.CharField(required=False, label=_('section title'))
+        content = serializers.CharField(required=False)
+
+        def get_query_set(self):
+            query_set = QuerySet(model=Paragraph)
+            query_set = query_set.filter(
+                **{'knowledge_id': self.data.get('knowledge_id'), 'document_id': self.data.get("document_id")})
+            if 'title' in self.data:
+                query_set = query_set.filter(
+                    **{'title__icontains': self.data.get('title')})
+            if 'content' in self.data:
+                query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
+            query_set.order_by('-create_time', 'id')
+            return query_set
+
+        def list(self):
+            return list(map(lambda row: ParagraphSerializer(row).data, self.get_query_set()))
+
+        def page(self, current_page, page_size):
+            query_set = self.get_query_set()
+            return page_search(current_page, page_size, query_set, lambda row: ParagraphSerializer(row).data)
+
+    class Association(serializers.Serializer):
+        workspace_id = serializers.CharField(required=True, label=_('workspace id'))
+        knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
+        problem_id = serializers.UUIDField(required=True, label=_('problem id'))
+        document_id = serializers.UUIDField(required=True, label=_('document id'))
+        paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
+
+        def is_valid(self, *, raise_exception=True):
+            super().is_valid(raise_exception=True)
+            knowledge_id = self.data.get('knowledge_id')
+            paragraph_id = self.data.get('paragraph_id')
+            problem_id = self.data.get("problem_id")
+            if not QuerySet(Paragraph).filter(knowledge_id=knowledge_id, id=paragraph_id).exists():
+                raise AppApiException(500, _('Paragraph does not exist'))
+            if not QuerySet(Problem).filter(knowledge_id=knowledge_id, id=problem_id).exists():
+                raise AppApiException(500, _('Problem does not exist'))
+
+        def association(self, with_valid=True, with_embedding=True):
+            if with_valid:
+                self.is_valid(raise_exception=True)
+            problem = QuerySet(Problem).filter(id=self.data.get("problem_id")).first()
+            problem_paragraph_mapping = ProblemParagraphMapping(id=uuid.uuid7(),
+                                                                document_id=self.data.get('document_id'),
+                                                                paragraph_id=self.data.get('paragraph_id'),
+                                                                knowledge_id=self.data.get('knowledge_id'),
+                                                                problem_id=problem.id)
+            problem_paragraph_mapping.save()
+            if with_embedding:
+                model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id'))
+                embedding_by_problem({
+                    'text': problem.content,
+                    'is_active': True,
+                    'source_type': SourceType.PROBLEM,
+                    'source_id': problem_paragraph_mapping.id,
+                    'document_id': self.data.get('document_id'),
+                    'paragraph_id': self.data.get('paragraph_id'),
+                    'knowledge_id': self.data.get('knowledge_id'),
+                }, model_id)
+
+        def un_association(self, with_valid=True):
+            if with_valid:
+                self.is_valid(raise_exception=True)
+            problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(
+                paragraph_id=self.data.get('paragraph_id'),
+                knowledge_id=self.data.get('knowledge_id'),
+                problem_id=self.data.get(
+                    'problem_id')).first()
+            problem_paragraph_mapping_id = problem_paragraph_mapping.id
+            problem_paragraph_mapping.delete()
+            delete_embedding_by_source(problem_paragraph_mapping_id)
+            return True
+
+    class Batch(serializers.Serializer):
+        knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
+        document_id = serializers.UUIDField(required=True, label=_('document id'))
+
+        @transaction.atomic
+        def batch_delete(self, instance: Dict, with_valid=True):
+            if with_valid:
+                BatchSerializer(data=instance).is_valid(model=Paragraph, raise_exception=True)
+                self.is_valid(raise_exception=True)
+            paragraph_id_list = instance.get("id_list")
+            QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
+            delete_problems_and_mappings(paragraph_id_list)
+            update_document_char_length(self.data.get('document_id'))
+            # 删除向量库
+            delete_embedding_by_paragraph_ids(paragraph_id_list)
+            return True
+

 def delete_problems_and_mappings(paragraph_ids):
    problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids)
--- a/apps/knowledge/urls.py
+++ b/apps/knowledge/urls.py
@ -21,6 +21,12 @@ urlpatterns = [
    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task/batch', views.DocumentView.BatchCancelTask.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch', views.ParagraphView.Batch.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>', views.ParagraphView.Operate.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem', views.ParagraphView.Problem.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/association', views.ParagraphView.Association.as_view()),
+    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/unassociation', views.ParagraphView.UnAssociation.as_view()),
    path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<int:current_page>/<int:page_sige>', views.DocumentView.Page.as_view()),
    path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()),
 ]
--- a/apps/knowledge/views/init.py
+++ b/apps/knowledge/views/init.py
@ -1,2 +1,3 @@
 from .document import *
 from .knowledge import *
+from .paragraph import *
--- a/apps/knowledge/views/paragraph.py
+++ b/apps/knowledge/views/paragraph.py
@ -0,0 +1,234 @@
+from django.utils.translation import gettext_lazy as _
+from drf_spectacular.utils import extend_schema
+from rest_framework.views import APIView
+from rest_framework.views import Request
+
+from common.auth import TokenAuth
+from common.auth.authentication import has_permissions
+from common.constants.permission_constants import PermissionConstants
+from common.result import result
+from common.utils.common import query_params_to_single_dict
+from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \
+    ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI
+from knowledge.serializers.paragraph import ParagraphSerializers
+
+
+class ParagraphView(APIView):
+    authentication_classes = [TokenAuth]
+
+    @extend_schema(
+        summary=_('Paragraph list'),
+        description=_('Paragraph list'),
+        operation_id=_('Paragraph list'),
+        parameters=ParagraphReadAPI.get_parameters(),
+        responses=ParagraphReadAPI.get_response(),
+        tags=[_('Knowledge Base/Documentation/Paragraph')]
+    )
+    @has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission())
+    def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
+        q = ParagraphSerializers.Query(
+            data={
+                **query_params_to_single_dict(request.query_params),
+                'workspace_id': workspace_id,
+                'knowledge_id': knowledge_id,
+                'document_id': document_id
+            }
+        )
+        q.is_valid(raise_exception=True)
+        return result.success(q.list())
+
+    @extend_schema(
+        summary=_('Create Paragraph'),
+        operation_id=_('Create Paragraph'),
+        parameters=ParagraphCreateAPI.get_parameters(),
+        request=ParagraphCreateAPI.get_request(),
+        responses=ParagraphCreateAPI.get_response(),
+        tags=[_('Knowledge Base/Documentation/Paragraph')]
+    )
+    @has_permissions(PermissionConstants.DOCUMENT_CREATE.get_workspace_permission())
+    def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
+        return result.success(ParagraphSerializers.Create(
+            data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
+        ).save(request.data))
+
+    class Batch(APIView):
+        authentication_classes = [TokenAuth]
+
+        @extend_schema(
+            methods=['DELETE'],
+            summary=_('Batch Paragraph'),
+            description=_('Batch Paragraph'),
+            operation_id=_('Batch Paragraph'),
+            parameters=ParagraphBatchDeleteAPI.get_parameters(),
+            request=ParagraphBatchDeleteAPI.get_request(),
+            responses=ParagraphBatchDeleteAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
+            return result.success(ParagraphSerializers.Batch(
+                data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
+            ).batch_delete(request.data))
+
+    class Operate(APIView):
+        authentication_classes = [TokenAuth]
+
+        @extend_schema(
+            methods=['PUT'],
+            summary=_('Modify paragraph data'),
+            description=_('Modify paragraph data'),
+            operation_id=_('Modify paragraph data'),
+            parameters=ParagraphEditAPI.get_parameters(),
+            request=ParagraphEditAPI.get_request(),
+            responses=ParagraphEditAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
+            o = ParagraphSerializers.Operate(
+                data={
+                    'workspace_id': workspace_id,
+                    "paragraph_id": paragraph_id,
+                    'knowledge_id': knowledge_id,
+                    'document_id': document_id
+                }
+            )
+            o.is_valid(raise_exception=True)
+            return result.success(o.edit(request.data))
+
+        @extend_schema(
+            methods=['GET'],
+            summary=_('Get paragraph details'),
+            description=_('Get paragraph details'),
+            operation_id=_('Get paragraph details'),
+            parameters=ParagraphGetAPI.get_parameters(),
+            responses=ParagraphGetAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
+            o = ParagraphSerializers.Operate(
+                data={
+                    'workspace_id': workspace_id,
+                    "paragraph_id": paragraph_id,
+                    'knowledge_id': knowledge_id,
+                    'document_id': document_id
+                }
+            )
+            o.is_valid(raise_exception=True)
+            return result.success(o.one())
+
+        @extend_schema(
+            methods=['DELETE'],
+            summary=_('Delete paragraph'),
+            description=_('Delete paragraph'),
+            operation_id=_('Delete paragraph'),
+            parameters=ParagraphGetAPI.get_parameters(),
+            responses=ParagraphGetAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')])
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
+            o = ParagraphSerializers.Operate(
+                data={
+                    'workspace_id': workspace_id,
+                    "paragraph_id": paragraph_id,
+                    'knowledge_id': knowledge_id,
+                    'document_id': document_id
+                }
+            )
+            o.is_valid(raise_exception=True)
+            return result.success(o.delete())
+
+    class Problem(APIView):
+        authentication_classes = [TokenAuth]
+
+        @extend_schema(
+            methods=['POST'],
+            summary=_('Add associated questions'),
+            description=_('Add associated questions'),
+            operation_id=_('Add associated questions'),
+            parameters=ProblemCreateAPI.get_parameters(),
+            request=ProblemCreateAPI.get_request(),
+            responses=ProblemCreateAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
+            return result.success(ParagraphSerializers.Problem(
+                data={
+                    'workspace_id': workspace_id,
+                    "knowledge_id": knowledge_id,
+                    'document_id': document_id,
+                    'paragraph_id': paragraph_id
+                }
+            ).save(request.data, with_valid=True))
+
+        @extend_schema(
+            methods=['GET'],
+            summary=_('Get a list of paragraph questions'),
+            description=_('Get a list of paragraph questions'),
+            operation_id=_('Get a list of paragraph questions'),
+            parameters=ParagraphGetAPI.get_parameters(),
+            responses=ParagraphGetAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
+            return result.success(ParagraphSerializers.Problem(
+                data={
+                    'workspace_id': workspace_id,
+                    "knowledge_id": knowledge_id,
+                    'document_id': document_id,
+                    'paragraph_id': paragraph_id
+                }
+            ).list(with_valid=True))
+
+    class UnAssociation(APIView):
+        authentication_classes = [TokenAuth]
+
+        @extend_schema(
+            methods=['PUT'],
+            summary=_('Disassociation issue'),
+            description=_('Disassociation issue'),
+            operation_id=_('Disassociation issue'),
+            parameters=UnAssociationAPI.get_parameters(),
+            responses=UnAssociationAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def put(self, request: Request,
+                workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
+            return result.success(ParagraphSerializers.Association(
+                data={
+                    'workspace_id': workspace_id,
+                    'knowledge_id': knowledge_id,
+                    'document_id': document_id,
+                    'paragraph_id': paragraph_id,
+                    'problem_id': problem_id
+                }
+            ).un_association())
+
+    class Association(APIView):
+        authentication_classes = [TokenAuth]
+
+        @extend_schema(
+            methods=['PUT'],
+            summary=_('Related questions'),
+            description=_('Related questions'),
+            operation_id=_('Related questions'),
+            parameters=AssociationAPI.get_parameters(),
+            responses=AssociationAPI.get_response(),
+            tags=[_('Knowledge Base/Documentation/Paragraph')]
+        )
+        @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
+        def put(self, request: Request,
+                workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
+            return result.success(ParagraphSerializers.Association(
+                data={
+                    'workspace_id': workspace_id,
+                    'knowledge_id': knowledge_id,
+                    'document_id': document_id,
+                    'paragraph_id': paragraph_id,
+                    'problem_id': problem_id
+                }
+            ).association())