fix: 修复删除文档不会删除文档中分段关联的问题的缺陷

--bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1623302
This commit is contained in:
wxg0103 2024-12-03 15:25:29 +08:00 committed by wxg
parent b6c65154c5
commit e5ead8ea20

View File

@ -56,6 +56,7 @@ from embedding.task.embedding import embedding_by_document, delete_embedding_by_
delete_embedding_by_document, update_embedding_dataset_id, delete_embedding_by_paragraph_ids, \
embedding_by_document_list
from smartdoc.conf import PROJECT_DIR
from django.db import models
parse_qa_handle_list = [XlsParseQAHandle(), CsvParseQAHandle(), XlsxParseQAHandle()]
parse_table_handle_list = [CsvSplitHandle(), XlsSplitHandle(), XlsxSplitHandle()]
@ -442,6 +443,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
QuerySet(model=Paragraph).filter(document_id=document_id).delete()
# 删除问题
QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete()
delete_problems_and_mappings([document_id])
# 删除向量库
delete_embedding_by_document(document_id)
paragraphs = get_split_model('web.md').parse(result.content)
@ -660,7 +662,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
# 删除段落
QuerySet(model=Paragraph).filter(document_id=document_id).delete()
# 删除问题
QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete()
delete_problems_and_mappings([document_id])
# 删除向量库
delete_embedding_by_document(document_id)
return True
@ -987,7 +989,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
document_id_list = instance.get("id_list")
QuerySet(Document).filter(id__in=document_id_list).delete()
QuerySet(Paragraph).filter(document_id__in=document_id_list).delete()
QuerySet(ProblemParagraphMapping).filter(document_id__in=document_id_list).delete()
delete_problems_and_mappings(document_id_list)
# 删除向量库
delete_embedding_by_document_list(document_id_list)
return True
@ -1086,3 +1088,14 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int):
if split_handle.support(file, get_buffer):
return split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image)
return default_split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image)
def delete_problems_and_mappings(document_ids):
problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id',
flat=True)
if problem_ids:
problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
'problem_id').annotate(count=models.Count('id'))
problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1]
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()