mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: 修复批量删除文档没有删除问题的缺陷
--bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1624544
This commit is contained in:
parent
d9a46b6ed2
commit
c54bfc7414
|
|
@ -18,7 +18,7 @@ import openpyxl
|
|||
from celery_once import AlreadyQueued
|
||||
from django.core import validators
|
||||
from django.db import transaction
|
||||
from django.db.models import QuerySet
|
||||
from django.db.models import QuerySet, Count
|
||||
from django.db.models.functions import Substr, Reverse
|
||||
from django.http import HttpResponse
|
||||
from drf_yasg import openapi
|
||||
|
|
@ -1091,11 +1091,17 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int):
|
|||
|
||||
|
||||
def delete_problems_and_mappings(document_ids):
|
||||
problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id',
|
||||
flat=True)
|
||||
# 获取所有需要删除的问题ID
|
||||
problem_ids = list(
|
||||
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', flat=True))
|
||||
|
||||
if problem_ids:
|
||||
problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
|
||||
'problem_id').annotate(count=models.Count('id'))
|
||||
problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1]
|
||||
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
|
||||
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
|
||||
'problem_id').annotate(count=Count('problem_id'))
|
||||
|
||||
problem_ids_to_delete = [pid for pid in problem_ids if
|
||||
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
|
||||
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
|
||||
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
|
||||
else:
|
||||
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from typing import Dict
|
|||
|
||||
from celery_once import AlreadyQueued
|
||||
from django.db import transaction
|
||||
from django.db.models import QuerySet
|
||||
from django.db.models import QuerySet, Count
|
||||
from drf_yasg import openapi
|
||||
from rest_framework import serializers
|
||||
|
||||
|
|
@ -291,7 +291,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
|
|||
self.is_valid(raise_exception=True)
|
||||
paragraph_id_list = instance.get("id_list")
|
||||
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
|
||||
QuerySet(ProblemParagraphMapping).filter(paragraph_id__in=paragraph_id_list).delete()
|
||||
delete_problems_and_mappings(paragraph_id_list)
|
||||
update_document_char_length(self.data.get('document_id'))
|
||||
# 删除向量库
|
||||
delete_embedding_by_paragraph_ids(paragraph_id_list)
|
||||
|
|
@ -541,14 +541,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
|
|||
self.is_valid(raise_exception=True)
|
||||
paragraph_id = self.data.get('paragraph_id')
|
||||
Paragraph.objects.filter(id=paragraph_id).delete()
|
||||
|
||||
problem_id = ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).values_list('problem_id',
|
||||
flat=True).first()
|
||||
|
||||
if problem_id is not None:
|
||||
if ProblemParagraphMapping.objects.filter(problem_id=problem_id).count() == 1:
|
||||
Problem.objects.filter(id=problem_id).delete()
|
||||
ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).delete()
|
||||
delete_problems_and_mappings([paragraph_id])
|
||||
|
||||
update_document_char_length(self.data.get('document_id'))
|
||||
delete_embedding_by_paragraph(paragraph_id)
|
||||
|
|
@ -755,3 +748,19 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
|
|||
prompt)
|
||||
except AlreadyQueued as e:
|
||||
raise AppApiException(500, "任务正在执行中,请勿重复下发")
|
||||
|
||||
|
||||
def delete_problems_and_mappings(paragraph_ids):
|
||||
problem_ids = list(
|
||||
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).values_list('problem_id', flat=True))
|
||||
|
||||
if problem_ids:
|
||||
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()
|
||||
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
|
||||
'problem_id').annotate(count=Count('problem_id'))
|
||||
|
||||
problem_ids_to_delete = [pid for pid in problem_ids if
|
||||
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
|
||||
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
|
||||
else:
|
||||
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()
|
||||
|
|
|
|||
Loading…
Reference in New Issue