From e5ead8ea203844173d2ac8e7dfab6b7070ac4010 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Tue, 3 Dec 2024 15:25:29 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E4=B8=8D=E4=BC=9A=E5=88=A0=E9=99=A4=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E4=B8=AD=E5=88=86=E6=AE=B5=E5=85=B3=E8=81=94=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98=E7=9A=84=E7=BC=BA=E9=99=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1623302 --- .../dataset/serializers/document_serializers.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index ac2006a52..46f1b1700 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -56,6 +56,7 @@ from embedding.task.embedding import embedding_by_document, delete_embedding_by_ delete_embedding_by_document, update_embedding_dataset_id, delete_embedding_by_paragraph_ids, \ embedding_by_document_list from smartdoc.conf import PROJECT_DIR +from django.db import models parse_qa_handle_list = [XlsParseQAHandle(), CsvParseQAHandle(), XlsxParseQAHandle()] parse_table_handle_list = [CsvSplitHandle(), XlsSplitHandle(), XlsxSplitHandle()] @@ -442,6 +443,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): QuerySet(model=Paragraph).filter(document_id=document_id).delete() # 删除问题 QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete() + delete_problems_and_mappings([document_id]) # 删除向量库 delete_embedding_by_document(document_id) paragraphs = get_split_model('web.md').parse(result.content) @@ -660,7 +662,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): # 删除段落 QuerySet(model=Paragraph).filter(document_id=document_id).delete() # 删除问题 - QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete() + delete_problems_and_mappings([document_id]) # 删除向量库 delete_embedding_by_document(document_id) return True @@ -987,7 +989,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): document_id_list = instance.get("id_list") QuerySet(Document).filter(id__in=document_id_list).delete() QuerySet(Paragraph).filter(document_id__in=document_id_list).delete() - QuerySet(ProblemParagraphMapping).filter(document_id__in=document_id_list).delete() + delete_problems_and_mappings(document_id_list) # 删除向量库 delete_embedding_by_document_list(document_id_list) return True @@ -1086,3 +1088,14 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int): if split_handle.support(file, get_buffer): return split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image) return default_split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image) + + +def delete_problems_and_mappings(document_ids): + problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', + flat=True) + if problem_ids: + problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values( + 'problem_id').annotate(count=models.Count('id')) + problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1] + Problem.objects.filter(id__in=problem_ids_to_delete).delete() + ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()