From 4dcd150c8dbc355b1d6b30822c5fc68d04d3fdbe Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 29 Apr 2025 13:07:22 +0800 Subject: [PATCH] refactor: improve code readability and formatting in common, knowledge, and paragraph modules --- apps/common/constants/permission_constants.py | 9 +- apps/common/db/search.py | 2 +- apps/knowledge/api/knowledge.py | 140 ++++++++++- ..._status_alter_paragraph_status_and_more.py | 38 +++ apps/knowledge/models/knowledge.py | 9 + apps/knowledge/serializers/common.py | 26 ++- apps/knowledge/serializers/document.py | 37 +-- apps/knowledge/serializers/knowledge.py | 219 ++++++++++++++++-- apps/knowledge/serializers/paragraph.py | 58 +++-- apps/knowledge/sql/list_knowledge.sql | 14 -- apps/knowledge/urls.py | 2 + apps/knowledge/views/knowledge.py | 87 ++++++- 12 files changed, 553 insertions(+), 88 deletions(-) create mode 100644 apps/knowledge/migrations/0003_alter_document_status_alter_paragraph_status_and_more.py diff --git a/apps/common/constants/permission_constants.py b/apps/common/constants/permission_constants.py index 92a1978e8..ce167e83d 100644 --- a/apps/common/constants/permission_constants.py +++ b/apps/common/constants/permission_constants.py @@ -211,12 +211,13 @@ class PermissionConstants(Enum): ) KNOWLEDGE_READ = Permission(group=Group.KNOWLEDGE, operate=Operate.READ, role_list=[RoleConstants.ADMIN, RoleConstants.USER], - resource_permission_group_list=[ - ResourcePermissionGroup.VIEW - ] - ) + resource_permission_group_list=[ResourcePermissionGroup.VIEW]) KNOWLEDGE_CREATE = Permission(group=Group.KNOWLEDGE, operate=Operate.CREATE, role_list=[RoleConstants.ADMIN, RoleConstants.USER]) + KNOWLEDGE_EDIT = Permission(group=Group.KNOWLEDGE, operate=Operate.EDIT, role_list=[RoleConstants.ADMIN, + RoleConstants.USER]) + KNOWLEDGE_DELETE = Permission(group=Group.KNOWLEDGE, operate=Operate.DELETE, role_list=[RoleConstants.ADMIN, + RoleConstants.USER]) def get_workspace_application_permission(self): return lambda r, kwargs: Permission(group=self.value.group, operate=self.value.operate, diff --git a/apps/common/db/search.py b/apps/common/db/search.py index 2955ba691..ec945b0b4 100644 --- a/apps/common/db/search.py +++ b/apps/common/db/search.py @@ -24,7 +24,7 @@ def get_dynamics_model(attr: dict, table_name='dynamics'): :return: django 模型 """ attributes = { - "__module__": "dataset.models", + "__module__": "knowledge.models", "Meta": type("Meta", (), {'db_table': table_name}), **attr } diff --git a/apps/knowledge/api/knowledge.py b/apps/knowledge/api/knowledge.py index 1caecfac8..554485c24 100644 --- a/apps/knowledge/api/knowledge.py +++ b/apps/knowledge/api/knowledge.py @@ -3,7 +3,8 @@ from drf_spectacular.utils import OpenApiParameter from common.mixins.api_mixin import APIMixin from common.result import ResultSerializer -from knowledge.serializers.knowledge import KnowledgeBaseCreateRequest, KnowledgeModelSerializer +from knowledge.serializers.knowledge import KnowledgeBaseCreateRequest, KnowledgeModelSerializer, KnowledgeEditRequest, \ + KnowledgeWebCreateRequest class KnowledgeCreateResponse(ResultSerializer): @@ -11,6 +12,31 @@ class KnowledgeCreateResponse(ResultSerializer): return KnowledgeModelSerializer() +class KnowledgeReadAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ) + ] + + @staticmethod + def get_response(): + return KnowledgeCreateResponse + + class KnowledgeBaseCreateAPI(APIMixin): @staticmethod def get_parameters(): @@ -48,14 +74,43 @@ class KnowledgeWebCreateAPI(APIMixin): @staticmethod def get_request(): - return KnowledgeBaseCreateRequest + return KnowledgeWebCreateRequest @staticmethod def get_response(): return KnowledgeCreateResponse -class KnowledgeTreeReadAPI(APIMixin): +class KnowledgeEditAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ) + ] + + @staticmethod + def get_request(): + return KnowledgeEditRequest + + @staticmethod + def get_response(): + return KnowledgeCreateResponse + + +class KnowledgeTreeReadAPI(KnowledgeReadAPI): @staticmethod def get_parameters(): return [ @@ -71,6 +126,83 @@ class KnowledgeTreeReadAPI(APIMixin): description="文件夹id", type=OpenApiTypes.STR, location='query', + required=True, + ), + OpenApiParameter( + name="user_id", + description="用户id", + type=OpenApiTypes.STR, + location='query', required=False, - ) + ), + OpenApiParameter( + name="name", + description="名称", + type=OpenApiTypes.STR, + location='query', + required=False, + ), + OpenApiParameter( + name="desc", + description="描述", + type=OpenApiTypes.STR, + location='query', + required=False, + ), + ] + + +class KnowledgePageAPI(KnowledgeReadAPI): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="current_page", + description="当前页码", + type=OpenApiTypes.INT, + location='path', + required=True, + ), + OpenApiParameter( + name="page_size", + description="每页条数", + type=OpenApiTypes.INT, + location='path', + required=True, + ), + OpenApiParameter( + name="folder_id", + description="文件夹id", + type=OpenApiTypes.STR, + location='query', + required=True, + ), + OpenApiParameter( + name="user_id", + description="用户id", + type=OpenApiTypes.STR, + location='query', + required=False, + ), + OpenApiParameter( + name="name", + description="名称", + type=OpenApiTypes.STR, + location='query', + required=False, + ), + OpenApiParameter( + name="desc", + description="描述", + type=OpenApiTypes.STR, + location='query', + required=False, + ), ] diff --git a/apps/knowledge/migrations/0003_alter_document_status_alter_paragraph_status_and_more.py b/apps/knowledge/migrations/0003_alter_document_status_alter_paragraph_status_and_more.py new file mode 100644 index 000000000..76dfcdcdf --- /dev/null +++ b/apps/knowledge/migrations/0003_alter_document_status_alter_paragraph_status_and_more.py @@ -0,0 +1,38 @@ +# Generated by Django 5.2 on 2025-04-29 08:02 + +import django.db.models.deletion +import knowledge.models.knowledge +import uuid_utils.compat +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('knowledge', '0002_document_paragraph_embedding_problem_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='document', + name='status', + field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'), + ), + migrations.AlterField( + model_name='paragraph', + name='status', + field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'), + ), + migrations.CreateModel( + name='ApplicationKnowledgeMapping', + fields=[ + ('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')), + ('update_time', models.DateTimeField(auto_now=True, verbose_name='修改时间')), + ('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')), + ('knowledge', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='knowledge.knowledge')), + ], + options={ + 'db_table': 'application_knowledge_mapping', + }, + ), + ] diff --git a/apps/knowledge/models/knowledge.py b/apps/knowledge/models/knowledge.py index 415bcc9fa..d00a655ff 100644 --- a/apps/knowledge/models/knowledge.py +++ b/apps/knowledge/models/knowledge.py @@ -197,6 +197,15 @@ class ProblemParagraphMapping(AppModelMixin): class Meta: db_table = "problem_paragraph_mapping" +class ApplicationKnowledgeMapping(AppModelMixin): + id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid7, editable=False, verbose_name="主键id") + # application = models.ForeignKey(Application, on_delete=models.CASCADE) + knowledge = models.ForeignKey(Knowledge, on_delete=models.CASCADE) + + class Meta: + db_table = "application_knowledge_mapping" + + class SourceType(models.IntegerChoices): """订单类型""" diff --git a/apps/knowledge/serializers/common.py b/apps/knowledge/serializers/common.py index e31826437..571ab4101 100644 --- a/apps/knowledge/serializers/common.py +++ b/apps/knowledge/serializers/common.py @@ -160,18 +160,26 @@ class ProblemParagraphManage: content__in=problem_list).all() problem_content_dict = {} problem_model_list = [ - or_get(exists_problem_list, problemParagraphObject.problem_content, problemParagraphObject.knowledge_id, - problemParagraphObject.document_id, problemParagraphObject.paragraph_id, problem_content_dict) for - problemParagraphObject in self.problem_paragraph_object_list] + or_get( + exists_problem_list, + problemParagraphObject.problem_content, + problemParagraphObject.knowledge_id, + problemParagraphObject.document_id, + problemParagraphObject.paragraph_id, problem_content_dict + ) for problemParagraphObject in self.problem_paragraph_object_list] problem_paragraph_mapping_list = [ - ProblemParagraphMapping(id=uuid.uuid7(), document_id=document_id, problem_id=problem_model.id, - paragraph_id=paragraph_id, - knowledge_id=self.knowledge_id) for - problem_model, document_id, paragraph_id in problem_model_list] + ProblemParagraphMapping( + id=uuid.uuid7(), + document_id=document_id, + problem_id=problem_model.id, + paragraph_id=paragraph_id, + knowledge_id=self.knowledge_id + ) for problem_model, document_id, paragraph_id in problem_model_list] - result = [problem_model for problem_model, is_create in problem_content_dict.values() if - is_create], problem_paragraph_mapping_list + result = [ + problem_model for problem_model, is_create in problem_content_dict.values() if is_create + ], problem_paragraph_mapping_list return result diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 76ec4afe4..789f682d5 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -72,13 +72,17 @@ class DocumentSerializers(serializers.Serializer): if embedding_model.permission_type == 'PRIVATE' and knowledge_user_id != embedding_model.user_id: raise AppApiException(500, _('No permission to use this model') + f"{embedding_model.name}") document_id = self.data.get("document_id") - ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING, - State.PENDING) - ListenerManagement.update_status(QuerySet(Paragraph).annotate( - reversed_status=Reverse('status'), - task_type_status=Substr('reversed_status', TaskType.EMBEDDING.value, 1), - ).filter(task_type_status__in=state_list, document_id=document_id).values('id'), - TaskType.EMBEDDING, State.PENDING) + ListenerManagement.update_status( + QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING, State.PENDING + ) + ListenerManagement.update_status( + QuerySet(Paragraph).annotate( + reversed_status=Reverse('status'), + task_type_status=Substr('reversed_status', TaskType.EMBEDDING.value, 1), + ).filter(task_type_status__in=state_list, document_id=document_id).values('id'), + TaskType.EMBEDDING, + State.PENDING + ) ListenerManagement.get_aggregation_document_status(document_id)() try: @@ -122,8 +126,9 @@ class DocumentSerializers(serializers.Serializer): # 批量插入问题 QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None # 批量插入关联问题 - QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len( - problem_paragraph_mapping_list) > 0 else None + QuerySet(ProblemParagraphMapping).bulk_create( + problem_paragraph_mapping_list + ) if len(problem_paragraph_mapping_list) > 0 else None document_id = str(document_model.id) return (DocumentSerializers.Operate( data={'knowledge_id': knowledge_id, 'document_id': document_id} @@ -160,13 +165,15 @@ class DocumentSerializers(serializers.Serializer): 'knowledge_id': knowledge_id, 'id': uuid.uuid7(), 'name': instance.get('name'), - 'char_length': reduce(lambda x, y: x + y, - [len(p.get('content')) for p in instance.get('paragraphs', [])], - 0), + 'char_length': reduce( + lambda x, y: x + y, + [len(p.get('content')) for p in instance.get('paragraphs', [])], + 0), 'meta': instance.get('meta') if instance.get('meta') is not None else {}, 'type': instance.get('type') if instance.get('type') is not None else KnowledgeType.BASE }) - return DocumentSerializers.Create.get_paragraph_model(document_model, - instance.get('paragraphs') if - 'paragraphs' in instance else []) + return DocumentSerializers.Create.get_paragraph_model( + document_model, + instance.get('paragraphs') if 'paragraphs' in instance else [] + ) diff --git a/apps/knowledge/serializers/knowledge.py b/apps/knowledge/serializers/knowledge.py index ebceb5227..0e22a74a1 100644 --- a/apps/knowledge/serializers/knowledge.py +++ b/apps/knowledge/serializers/knowledge.py @@ -1,19 +1,23 @@ +import os from functools import reduce from typing import Dict import uuid_utils.compat as uuid -from django.db import transaction +from django.db import transaction, models from django.db.models import QuerySet from django.utils.translation import gettext_lazy as _ from rest_framework import serializers +from common.db.search import native_search, get_dynamics_model, native_page_search +from common.db.sql_execute import select_list from common.exception.app_exception import AppApiException -from common.utils.common import valid_license, post +from common.utils.common import valid_license, post, get_file_content from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \ - ProblemParagraphMapping -from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id + ProblemParagraphMapping, ApplicationKnowledgeMapping +from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer from knowledge.serializers.document import DocumentSerializers -from knowledge.task import sync_web_knowledge, embedding_by_knowledge +from knowledge.task import sync_web_knowledge, embedding_by_knowledge, delete_embedding_by_knowledge +from maxkb.conf import PROJECT_DIR class KnowledgeModelSerializer(serializers.ModelSerializer): @@ -38,14 +42,207 @@ class KnowledgeWebCreateRequest(serializers.Serializer): selector = serializers.CharField(required=True, label=_('knowledge selector')) +class KnowledgeEditRequest(serializers.Serializer): + name = serializers.CharField(required=False, max_length=64, min_length=1, label=_('knowledge name')) + desc = serializers.CharField(required=False, max_length=256, min_length=1, label=_('knowledge description')) + meta = serializers.DictField(required=False) + application_id_list = serializers.ListSerializer( + required=False, + child=serializers.UUIDField(required=True, label=_('application id')), + label=_('application id list') + ) + + @staticmethod + def get_knowledge_meta_valid_map(): + knowledge_meta_valid_map = { + KnowledgeType.BASE: MetaSerializer.BaseMeta, + KnowledgeType.WEB: MetaSerializer.WebMeta + } + return knowledge_meta_valid_map + + def is_valid(self, *, knowledge: Knowledge = None): + super().is_valid(raise_exception=True) + if 'meta' in self.data and self.data.get('meta') is not None: + knowledge_meta_valid_map = self.get_knowledge_meta_valid_map() + valid_class = knowledge_meta_valid_map.get(knowledge.type) + valid_class(data=self.data.get('meta')).is_valid(raise_exception=True) + + class KnowledgeSerializer(serializers.Serializer): + class Query(serializers.Serializer): + workspace_id = serializers.CharField(required=True) + folder_id = serializers.CharField(required=True) + name = serializers.CharField(required=False, label=_('knowledge name'), allow_null=True, allow_blank=True, + max_length=64, min_length=1) + desc = serializers.CharField(required=False, label=_('knowledge description'), allow_null=True, + allow_blank=True, max_length=256, min_length=1) + user_id = serializers.UUIDField(required=False, label=_('user id'), allow_null=True) + + def get_query_set(self): + workspace_id = self.data.get("workspace_id") + query_set_dict = {} + query_set = QuerySet(model=get_dynamics_model({ + 'temp.name': models.CharField(), + 'temp.desc': models.CharField(), + "document_temp.char_length": models.IntegerField(), + 'temp.create_time': models.DateTimeField(), + 'temp.user_id': models.CharField(), + 'temp.workspace_id': models.CharField(), + 'temp.folder_id': models.CharField(), + 'temp.id': models.CharField() + })) + if "desc" in self.data and self.data.get('desc') is not None: + query_set = query_set.filter(**{'temp.desc__icontains': self.data.get("desc")}) + if "name" in self.data and self.data.get('name') is not None: + query_set = query_set.filter(**{'temp.name__icontains': self.data.get("name")}) + if "user_id" in self.data and self.data.get('user_id') is not None: + query_set = query_set.filter(**{'temp.user_id': self.data.get("user_id")}) + if "workspace_id" in self.data and self.data.get('workspace_id') is not None: + query_set = query_set.filter(**{'temp.workspace_id': self.data.get("workspace_id")}) + if "folder_id" in self.data and self.data.get('folder_id') is not None: + query_set = query_set.filter(**{'temp.folder_id': self.data.get("folder_id")}) + query_set = query_set.order_by("-temp.create_time", "temp.id") + query_set_dict['default_sql'] = query_set + + query_set_dict['knowledge_custom_sql'] = QuerySet(model=get_dynamics_model({ + 'knowledge.workspace_id': models.CharField(), + })).filter(**{'knowledge.workspace_id': workspace_id}) + + return query_set_dict + + def page(self, current_page: int, page_size: int): + self.is_valid(raise_exception=True) + return native_page_search( + current_page, + page_size, + self.get_query_set(), + select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_knowledge.sql') + ), + post_records_handler=lambda r: r + ) + + def list(self): + self.is_valid(raise_exception=True) + return native_search( + self.get_query_set(), + select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_knowledge.sql') + ) + ) + + class Operate(serializers.Serializer): + user_id = serializers.UUIDField(required=True, label=_('user id')) + workspace_id = serializers.CharField(required=True, label=_('workspace id')) + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) + + def list_application(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + knowledge = QuerySet(Knowledge).get(id=self.data.get("knowledge_id")) + return select_list( + get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_knowledge_application.sql') + ), + [ + self.data.get('user_id') if self.data.get('user_id') == str(knowledge.user_id) else None, + knowledge.user_id, + self.data.get('user_id') + ] + ) + + def one(self): + self.is_valid() + query_set_dict = { + 'default_sql': QuerySet( + model=get_dynamics_model({'temp.id': models.UUIDField()}) + ).filter(**{'temp.id': self.data.get("knowledge_id")}), + 'knowledge_custom_sql': QuerySet( + model=get_dynamics_model({'knowledge.user_id': models.CharField()}) + ).filter(**{'knowledge.user_id': self.data.get("user_id")}), + } + # todo 这里需要优化 + # all_application_list = [str(adm.get('id')) for adm in self.list_application(with_valid=False)] + all_application_list = [] + return { + **native_search(query_set_dict, select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_knowledge.sql')), with_search_one=True), + 'application_id_list': list(filter( + lambda application_id: all_application_list.__contains__(application_id), + [ + str( + application_knowledge_mapping.application_id + ) for application_knowledge_mapping in + QuerySet(ApplicationKnowledgeMapping).filter(knowledge_id=self.data.get('knowledge_id')) + ] + )) + } + + @transaction.atomic + def edit(self, instance: Dict): + self.is_valid() + if QuerySet(Knowledge).filter( + workspace_id=self.data.get('workspace_id'), + name=instance.get('name') + ).exclude(id=self.data.get('knowledge_id')).exists(): + raise AppApiException(500, _('Knowledge base name duplicate!')) + knowledge = QuerySet(Knowledge).get(id=self.data.get("knowledge_id")) + KnowledgeEditRequest(data=instance).is_valid(knowledge=knowledge) + if 'embedding_model_id' in instance: + knowledge.embedding_model_id = instance.get('embedding_model_id') + if "name" in instance: + knowledge.name = instance.get("name") + if 'desc' in instance: + knowledge.desc = instance.get("desc") + if 'meta' in instance: + knowledge.meta = instance.get('meta') + if 'application_id_list' in instance and instance.get('application_id_list') is not None: + application_id_list = instance.get('application_id_list') + # 当前用户可修改关联的知识库列表 + application_knowledge_id_list = [ + str(knowledge_dict.get('id')) for knowledge_dict in self.list_application(with_valid=False) + ] + for knowledge_id in application_id_list: + if not application_knowledge_id_list.__contains__(knowledge_id): + raise AppApiException( + 500, + _( + 'Unknown application id {knowledge_id}, cannot be associated' + ).format(knowledge_id=knowledge_id) + ) + + QuerySet(ApplicationKnowledgeMapping).filter( + application_id__in=application_knowledge_id_list, + knowledge_id=self.data.get("knowledge_id") + ).delete() + # 插入 + QuerySet(ApplicationKnowledgeMapping).bulk_create([ + ApplicationKnowledgeMapping( + application_id=application_id, knowledge_id=self.data.get('knowledge_id') + ) for application_id in application_id_list + ]) if len(application_id_list) > 0 else None + + knowledge.save() + return self.one() + + @transaction.atomic + def delete(self): + self.is_valid() + knowledge = QuerySet(Knowledge).get(id=self.data.get("knowledge_id")) + QuerySet(Document).filter(knowledge=knowledge).delete() + QuerySet(ProblemParagraphMapping).filter(knowledge=knowledge).delete() + QuerySet(Paragraph).filter(knowledge=knowledge).delete() + QuerySet(Problem).filter(knowledge=knowledge).delete() + knowledge.delete() + delete_embedding_by_knowledge(self.data.get('knowledge_id')) + return True + class Create(serializers.Serializer): user_id = serializers.UUIDField(required=True, label=_('user id')) workspace_id = serializers.CharField(required=True, label=_('workspace id')) @staticmethod def post_embedding_knowledge(document_list, knowledge_id): - # todo 发送向量化事件 model_id = get_embedding_model_id_by_knowledge_id(knowledge_id) embedding_by_knowledge.delay(knowledge_id, model_id) return document_list @@ -102,8 +299,9 @@ class KnowledgeSerializer(serializers.Serializer): # 批量插入问题 QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None # 批量插入关联问题 - QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len( - problem_paragraph_mapping_list) > 0 else None + QuerySet(ProblemParagraphMapping).bulk_create( + problem_paragraph_mapping_list + ) if len(problem_paragraph_mapping_list) > 0 else None return { **KnowledgeModelSerializer(knowledge).data, @@ -141,8 +339,3 @@ class KnowledgeSerializer(serializers.Serializer): knowledge.save() sync_web_knowledge.delay(str(knowledge_id), instance.get('source_url'), instance.get('selector')) return {**KnowledgeModelSerializer(knowledge).data, 'document_list': []} - - -class KnowledgeTreeSerializer(serializers.Serializer): - def get_knowledge_list(self, param): - pass diff --git a/apps/knowledge/serializers/paragraph.py b/apps/knowledge/serializers/paragraph.py index a297e07dd..80291d458 100644 --- a/apps/knowledge/serializers/paragraph.py +++ b/apps/knowledge/serializers/paragraph.py @@ -13,7 +13,7 @@ from common.utils.common import post from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \ get_embedding_model_id_by_knowledge_id, update_document_char_length -from knowledge.serializers.problem import ProblemInstanceSerializer +from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer from knowledge.task import embedding_by_paragraph, enable_embedding_by_paragraph, disable_embedding_by_paragraph, \ delete_embedding_by_paragraph @@ -53,7 +53,7 @@ class ParagraphSerializers(serializers.Serializer): # 段落id paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) # 知识库id - dataset_id = serializers.UUIDField(required=True, label=_('dataset id')) + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) # 文档id document_id = serializers.UUIDField(required=True, label=_('document id')) @@ -105,20 +105,27 @@ class ParagraphSerializers(serializers.Serializer): QuerySet(Problem).filter(id__in=[row.id for row in delete_problem_list]).delete() if len( delete_problem_list) > 0 else None # 插入新的问题 - QuerySet(Problem).bulk_create( - [Problem(id=uuid.uuid1(), content=p.get('content'), paragraph_id=self.data.get('paragraph_id'), - dataset_id=self.data.get('dataset_id'), document_id=self.data.get('document_id')) for - p in create_problem_list]) if len(create_problem_list) else None + QuerySet(Problem).bulk_create([ + Problem( + id=uuid.uuid7(), + content=p.get('content'), + paragraph_id=self.data.get('paragraph_id'), + knowledge_id=self.data.get('knowledge_id'), + document_id=self.data.get('document_id') + ) for p in create_problem_list + ]) if len(create_problem_list) else None # 修改问题集合 - QuerySet(Problem).bulk_update( - [Problem(id=row.get('id'), content=row.get('content')) for row in update_problem_list], - ['content']) if len( - update_problem_list) > 0 else None + QuerySet(Problem).bulk_update([ + Problem( + id=row.get('id'), + content=row.get('content') + ) for row in update_problem_list], ['content'] + ) if len(update_problem_list) > 0 else None _paragraph.save() update_document_char_length(self.data.get('document_id')) - return self.one(), instance, self.data.get('dataset_id') + return self.one(), instance, self.data.get('knowledge_id') def get_problem_list(self): ProblemParagraphMapping(ProblemParagraphMapping) @@ -172,8 +179,9 @@ class ParagraphSerializers(serializers.Serializer): # 插入問題 QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None # 插入问题关联关系 - QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len( - problem_paragraph_mapping_list) > 0 else None + QuerySet(ProblemParagraphMapping).bulk_create( + problem_paragraph_mapping_list + ) if len(problem_paragraph_mapping_list) > 0 else None # 修改长度 update_document_char_length(document_id) if with_embedding: @@ -185,17 +193,21 @@ class ParagraphSerializers(serializers.Serializer): @staticmethod def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: Dict): - paragraph = Paragraph(id=uuid.uuid7(), - document_id=document_id, - content=instance.get("content"), - knowledge_id=knowledge_id, - title=instance.get("title") if 'title' in instance else '') - problem_paragraph_object_list = [ - ProblemParagraphObject(knowledge_id, document_id, paragraph.id, problem.get('content')) for problem in - (instance.get('problem_list') if 'problem_list' in instance else [])] + paragraph = Paragraph( + id=uuid.uuid7(), + document_id=document_id, + content=instance.get("content"), + knowledge_id=knowledge_id, + title=instance.get("title") if 'title' in instance else '' + ) + problem_paragraph_object_list = [ProblemParagraphObject( + knowledge_id, document_id, str(paragraph.id), problem.get('content') + ) for problem in (instance.get('problem_list') if 'problem_list' in instance else [])] - return {'paragraph': paragraph, - 'problem_paragraph_object_list': problem_paragraph_object_list} + return { + 'paragraph': paragraph, + 'problem_paragraph_object_list': problem_paragraph_object_list + } @staticmethod def or_get(exists_problem_list, content, knowledge_id): diff --git a/apps/knowledge/sql/list_knowledge.sql b/apps/knowledge/sql/list_knowledge.sql index 61b48eab1..d30b81849 100644 --- a/apps/knowledge/sql/list_knowledge.sql +++ b/apps/knowledge/sql/list_knowledge.sql @@ -14,20 +14,6 @@ FROM FROM knowledge knowledge ${knowledge_custom_sql} - UNION - SELECT - * - FROM - knowledge - WHERE - knowledge."id" IN ( - SELECT - team_member_permission.target - FROM - team_member team_member - LEFT JOIN team_member_permission team_member_permission ON team_member_permission.member_id = team_member."id" - ${team_member_permission_custom_sql} - ) ) temp_knowledge LEFT JOIN ( SELECT "count" ( "id" ) AS document_count, "sum" ( "char_length" ) "char_length", knowledge_id FROM "document" GROUP BY knowledge_id ) "document_temp" ON temp_knowledge."id" = "document_temp".knowledge_id LEFT JOIN (SELECT "count"("id"),knowledge_id FROM application_knowledge_mapping GROUP BY knowledge_id) app_knowledge_temp ON temp_knowledge."id" = "app_knowledge_temp".knowledge_id diff --git a/apps/knowledge/urls.py b/apps/knowledge/urls.py index 081209f18..3715e11f7 100644 --- a/apps/knowledge/urls.py +++ b/apps/knowledge/urls.py @@ -7,4 +7,6 @@ urlpatterns = [ path('workspace//knowledge', views.KnowledgeView.as_view()), path('workspace//knowledge/base', views.KnowledgeBaseView.as_view()), path('workspace//knowledge/web', views.KnowledgeWebView.as_view()), + path('workspace//knowledge/', views.KnowledgeView.Operate.as_view()), + path('workspace//knowledge//', views.KnowledgeView.Page.as_view()), ] diff --git a/apps/knowledge/views/knowledge.py b/apps/knowledge/views/knowledge.py index 2ea2e312d..e97891888 100644 --- a/apps/knowledge/views/knowledge.py +++ b/apps/knowledge/views/knowledge.py @@ -7,8 +7,9 @@ from common.auth import TokenAuth from common.auth.authentication import has_permissions from common.constants.permission_constants import PermissionConstants from common.result import result -from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI -from knowledge.serializers.knowledge import KnowledgeSerializer, KnowledgeTreeSerializer +from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \ + KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI +from knowledge.serializers.knowledge import KnowledgeSerializer class KnowledgeView(APIView): @@ -24,9 +25,85 @@ class KnowledgeView(APIView): ) @has_permissions(PermissionConstants.KNOWLEDGE_READ.get_workspace_permission()) def get(self, request: Request, workspace_id: str): - return result.success(KnowledgeTreeSerializer( - data={'workspace_id': workspace_id} - ).get_knowledge_list(request.query_params.get('folder_id'))) + return result.success(KnowledgeSerializer.Query( + data={ + 'workspace_id': workspace_id, + 'folder_id': request.query_params.get('folder_id'), + 'name': request.query_params.get('name'), + 'desc': request.query_params.get("desc"), + 'user_id': request.query_params.get('user_id') + } + ).list()) + + class Operate(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=['PUT'], + description=_('Edit knowledge'), + operation_id=_('Edit knowledge'), + parameters=KnowledgeEditAPI.get_parameters(), + request=KnowledgeEditAPI.get_request(), + responses=KnowledgeEditAPI.get_response(), + tags=[_('Knowledge Base')] + ) + @has_permissions(PermissionConstants.KNOWLEDGE_EDIT.get_workspace_permission()) + def put(self, request: Request, workspace_id: str, knowledge_id: str): + return result.success(KnowledgeSerializer.Operate( + data={'user_id': request.user.id, 'workspace_id': workspace_id, 'knowledge_id': knowledge_id} + ).edit(request.data)) + + @extend_schema( + methods=['DELETE'], + description=_('Delete knowledge'), + operation_id=_('Delete knowledge'), + parameters=KnowledgeBaseCreateAPI.get_parameters(), + request=KnowledgeBaseCreateAPI.get_request(), + responses=KnowledgeBaseCreateAPI.get_response(), + tags=[_('Knowledge Base')] + ) + @has_permissions(PermissionConstants.KNOWLEDGE_DELETE.get_workspace_permission()) + def delete(self, request: Request, workspace_id: str, knowledge_id: str): + return result.success(KnowledgeSerializer.Operate( + data={'user_id': request.user.id, 'workspace_id': workspace_id, 'knowledge_id': knowledge_id} + ).delete()) + + @extend_schema( + methods=['GET'], + description=_('Get knowledge'), + operation_id=_('Get knowledge'), + parameters=KnowledgeReadAPI.get_parameters(), + responses=KnowledgeReadAPI.get_response(), + tags=[_('Knowledge Base')] + ) + @has_permissions(PermissionConstants.KNOWLEDGE_DELETE.get_workspace_permission()) + def get(self, request: Request, workspace_id: str, knowledge_id: str): + return result.success(KnowledgeSerializer.Operate( + data={'user_id': request.user.id, 'workspace_id': workspace_id, 'knowledge_id': knowledge_id} + ).one()) + + class Page(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + methods=['GET'], + description=_('Get the knowledge base paginated list'), + operation_id=_('Get the knowledge base paginated list'), + parameters=KnowledgePageAPI.get_parameters(), + responses=KnowledgePageAPI.get_response(), + tags=[_('Knowledge Base')] + ) + @has_permissions(PermissionConstants.KNOWLEDGE_READ.get_workspace_permission()) + def get(self, request: Request, workspace_id: str, current_page: int, page_size: int): + return result.success(KnowledgeSerializer.Query( + data={ + 'workspace_id': workspace_id, + 'folder_id': request.query_params.get('folder_id'), + 'name': request.query_params.get('name'), + 'desc': request.query_params.get("desc"), + 'user_id': request.query_params.get('user_id') + } + ).page(current_page, page_size)) class KnowledgeBaseView(APIView):