From 2084112d54742ac3506f53163387edfbc5ec43c8 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:38:03 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Dweb=E7=AB=99=E7=82=B9?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E5=90=8C=E6=AD=A5=E7=8A=B6=E6=80=81?= =?UTF-8?q?=E9=94=99=E8=AF=AF=20(#1731)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataset/serializers/document_serializers.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index cf0f657eb..ac2006a52 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -432,6 +432,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.SYNC, State.PENDING) + ListenerManagement.get_aggregation_document_status(document_id)() source_url = document.meta.get('source_url') selector_list = document.meta.get('selector').split( " ") if 'selector' in document.meta and document.meta.get('selector') is not None else [] @@ -444,10 +445,10 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): # 删除向量库 delete_embedding_by_document(document_id) paragraphs = get_split_model('web.md').parse(result.content) - document.char_length = reduce(lambda x, y: x + y, - [len(p.get('content')) for p in paragraphs], - 0) - document.save() + char_length = reduce(lambda x, y: x + y, + [len(p.get('content')) for p in paragraphs], + 0) + QuerySet(Document).filter(id=document_id).update(char_length=char_length) document_paragraph_model = DocumentSerializers.Create.get_paragraph_model(document, paragraphs) paragraph_model_list = document_paragraph_model.get('paragraph_model_list') @@ -464,6 +465,13 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): # 向量化 if with_embedding: embedding_model_id = get_embedding_model_id_by_dataset_id(document.dataset_id) + ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), + TaskType.EMBEDDING, + State.PENDING) + ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id), + TaskType.EMBEDDING, + State.PENDING) + ListenerManagement.get_aggregation_document_status(document_id)() embedding_by_document.delay(document_id, embedding_model_id) else: @@ -477,6 +485,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id), TaskType.SYNC, state) + ListenerManagement.get_aggregation_document_status(document_id)() return True class Operate(ApiMixin, serializers.Serializer):