From cf1dac76cdc1bb69da64f67b4a3a851ba3726d21 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Fri, 19 Jan 2024 16:47:18 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=87=E6=A1=A3=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0meta=E5=85=83=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataset/serializers/common_serializers.py | 18 +++++++++++ .../serializers/dataset_serializers.py | 22 ++----------- .../serializers/document_serializers.py | 31 +++++++++++++++++-- apps/dataset/sql/list_document.sql | 1 + 4 files changed, 50 insertions(+), 22 deletions(-) diff --git a/apps/dataset/serializers/common_serializers.py b/apps/dataset/serializers/common_serializers.py index 1d8010136..651764356 100644 --- a/apps/dataset/serializers/common_serializers.py +++ b/apps/dataset/serializers/common_serializers.py @@ -18,6 +18,7 @@ from common.db.sql_execute import update_execute from common.exception.app_exception import AppApiException from common.mixins.api_mixin import ApiMixin from common.util.file_util import get_file_content +from common.util.fork import Fork from dataset.models import Paragraph from smartdoc.conf import PROJECT_DIR @@ -35,6 +36,23 @@ def list_paragraph(paragraph_list: List[str]): os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'list_paragraph.sql'))) +class MetaSerializer(serializers.Serializer): + class WebMeta(serializers.Serializer): + source_url = serializers.CharField(required=True) + selector = serializers.CharField(required=False, allow_null=True, allow_blank=True) + + def is_valid(self, *, raise_exception=False): + super().is_valid(raise_exception=True) + source_url = self.data.get('source_url') + response = Fork(source_url, []).fork() + if response.status == 500: + raise AppApiException(500, response.message) + + class BaseMeta(serializers.Serializer): + def is_valid(self, *, raise_exception=False): + super().is_valid(raise_exception=True) + + class BatchSerializer(ApiMixin, serializers.Serializer): id_list = serializers.ListField(required=True, child=serializers.UUIDField(required=True)) diff --git a/apps/dataset/serializers/dataset_serializers.py b/apps/dataset/serializers/dataset_serializers.py index e917266d4..3bafe31a4 100644 --- a/apps/dataset/serializers/dataset_serializers.py +++ b/apps/dataset/serializers/dataset_serializers.py @@ -34,7 +34,7 @@ from common.util.file_util import get_file_content from common.util.fork import ChildLink, Fork from common.util.split_model import get_split_model from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type -from dataset.serializers.common_serializers import list_paragraph +from dataset.serializers.common_serializers import list_paragraph, MetaSerializer from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer from setting.models import AuthOperate from smartdoc.conf import PROJECT_DIR @@ -425,22 +425,6 @@ class DataSetSerializers(serializers.ModelSerializer): } ) - class MetaSerializer(serializers.Serializer): - class WebMeta(serializers.Serializer): - source_url = serializers.CharField(required=True) - selector = serializers.CharField(required=False, allow_null=True, allow_blank=True) - - def is_valid(self, *, raise_exception=False): - super().is_valid(raise_exception=True) - source_url = self.data.get('source_url') - response = Fork(source_url, []).fork() - if response.status == 500: - raise AppApiException(500, response.message) - - class BaseMeta(serializers.Serializer): - def is_valid(self, *, raise_exception=False): - super().is_valid(raise_exception=True) - class Edit(serializers.Serializer): name = serializers.CharField(required=False) desc = serializers.CharField(required=False) @@ -450,8 +434,8 @@ class DataSetSerializers(serializers.ModelSerializer): @staticmethod def get_dataset_meta_valid_map(): dataset_meta_valid_map = { - Type.base: DataSetSerializers.MetaSerializer.BaseMeta, - Type.web: DataSetSerializers.MetaSerializer.WebMeta + Type.base: MetaSerializer.BaseMeta, + Type.web: MetaSerializer.WebMeta } return dataset_meta_valid_map diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index 653c49166..dd32a7f4a 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -29,11 +29,32 @@ from common.util.file_util import get_file_content from common.util.fork import Fork from common.util.split_model import SplitModel, get_split_model from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, Status -from dataset.serializers.common_serializers import BatchSerializer +from dataset.serializers.common_serializers import BatchSerializer, MetaSerializer from dataset.serializers.paragraph_serializers import ParagraphSerializers, ParagraphInstanceSerializer from smartdoc.conf import PROJECT_DIR +class DocumentEditInstanceSerializer(ApiMixin, serializers.Serializer): + meta = serializers.DictField(required=False) + name = serializers.CharField(required=False) + is_active = serializers.BooleanField(required=False) + + @staticmethod + def get_meta_valid_map(): + dataset_meta_valid_map = { + Type.base: MetaSerializer.BaseMeta, + Type.web: MetaSerializer.WebMeta + } + return dataset_meta_valid_map + + def is_valid(self, *, document: Document = None): + super().is_valid(raise_exception=True) + if 'meta' in self.data and self.data.get('meta') is not None: + dataset_meta_valid_map = self.get_meta_valid_map() + valid_class = dataset_meta_valid_map.get(document.type) + valid_class(data=self.data.get('meta')).is_valid(raise_exception=True) + + class DocumentWebInstanceSerializer(ApiMixin, serializers.Serializer): source_url_list = serializers.ListField(required=True, child=serializers.CharField(required=True)) selector = serializers.CharField(required=False) @@ -212,9 +233,11 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): def edit(self, instance: Dict, with_valid=False): if with_valid: - self.is_valid() + self.is_valid(raise_exception=True) _document = QuerySet(Document).get(id=self.data.get("document_id")) - update_keys = ['name', 'is_active'] + if with_valid: + DocumentEditInstanceSerializer(data=instance).is_valid(document=_document) + update_keys = ['name', 'is_active', 'meta'] for update_key in update_keys: if update_key in instance and instance.get(update_key) is not None: _document.__setattr__(update_key, instance.get(update_key)) @@ -282,6 +305,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): properties={ 'name': openapi.Schema(type=openapi.TYPE_STRING, title="文档名称", description="文档名称"), 'is_active': openapi.Schema(type=openapi.TYPE_BOOLEAN, title="是否可用", description="是否可用"), + 'meta': openapi.Schema(type=openapi.TYPE_OBJECT, title="文档元数据", + description="文档元数据->web:{source_url:xxx,selector:'xxx'},base:{}"), } ) diff --git a/apps/dataset/sql/list_document.sql b/apps/dataset/sql/list_document.sql index 7d8d6968f..818d783c8 100644 --- a/apps/dataset/sql/list_document.sql +++ b/apps/dataset/sql/list_document.sql @@ -1,5 +1,6 @@ SELECT "document".* , + to_json("document"."meta") as meta, (SELECT "count"("id") FROM "paragraph" WHERE document_id="document"."id") as "paragraph_count" FROM "document" "document"