From c54bfc74148e4ce542271b6f171049401d878088 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Wed, 4 Dec 2024 11:03:55 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=89=B9=E9=87=8F?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E6=A1=A3=E6=B2=A1=E6=9C=89=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E9=97=AE=E9=A2=98=E7=9A=84=E7=BC=BA=E9=99=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1624544 --- .../serializers/document_serializers.py | 20 ++++++++----- .../serializers/paragraph_serializers.py | 29 ++++++++++++------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index 46f1b1700..3688a37bb 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -18,7 +18,7 @@ import openpyxl from celery_once import AlreadyQueued from django.core import validators from django.db import transaction -from django.db.models import QuerySet +from django.db.models import QuerySet, Count from django.db.models.functions import Substr, Reverse from django.http import HttpResponse from drf_yasg import openapi @@ -1091,11 +1091,17 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int): def delete_problems_and_mappings(document_ids): - problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', - flat=True) + # 获取所有需要删除的问题ID + problem_ids = list( + ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', flat=True)) + if problem_ids: - problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values( - 'problem_id').annotate(count=models.Count('id')) - problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1] + ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete() + remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values( + 'problem_id').annotate(count=Count('problem_id')) + + problem_ids_to_delete = [pid for pid in problem_ids if + not any(pc['problem_id'] == pid for pc in remaining_problem_counts)] Problem.objects.filter(id__in=problem_ids_to_delete).delete() - ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete() + else: + ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete() diff --git a/apps/dataset/serializers/paragraph_serializers.py b/apps/dataset/serializers/paragraph_serializers.py index a115e544b..7bc73e8a8 100644 --- a/apps/dataset/serializers/paragraph_serializers.py +++ b/apps/dataset/serializers/paragraph_serializers.py @@ -11,7 +11,7 @@ from typing import Dict from celery_once import AlreadyQueued from django.db import transaction -from django.db.models import QuerySet +from django.db.models import QuerySet, Count from drf_yasg import openapi from rest_framework import serializers @@ -291,7 +291,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer): self.is_valid(raise_exception=True) paragraph_id_list = instance.get("id_list") QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete() - QuerySet(ProblemParagraphMapping).filter(paragraph_id__in=paragraph_id_list).delete() + delete_problems_and_mappings(paragraph_id_list) update_document_char_length(self.data.get('document_id')) # 删除向量库 delete_embedding_by_paragraph_ids(paragraph_id_list) @@ -541,14 +541,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer): self.is_valid(raise_exception=True) paragraph_id = self.data.get('paragraph_id') Paragraph.objects.filter(id=paragraph_id).delete() - - problem_id = ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).values_list('problem_id', - flat=True).first() - - if problem_id is not None: - if ProblemParagraphMapping.objects.filter(problem_id=problem_id).count() == 1: - Problem.objects.filter(id=problem_id).delete() - ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).delete() + delete_problems_and_mappings([paragraph_id]) update_document_char_length(self.data.get('document_id')) delete_embedding_by_paragraph(paragraph_id) @@ -755,3 +748,19 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer): prompt) except AlreadyQueued as e: raise AppApiException(500, "任务正在执行中,请勿重复下发") + + +def delete_problems_and_mappings(paragraph_ids): + problem_ids = list( + ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).values_list('problem_id', flat=True)) + + if problem_ids: + ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete() + remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values( + 'problem_id').annotate(count=Count('problem_id')) + + problem_ids_to_delete = [pid for pid in problem_ids if + not any(pc['problem_id'] == pid for pc in remaining_problem_counts)] + Problem.objects.filter(id__in=problem_ids_to_delete).delete() + else: + ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()