fix: 批量删除文档,未删除关联段落信息, 添加关联问题报错

This commit is contained in:
shaohuzhang1 2024-01-23 15:52:15 +08:00
parent a98b537beb
commit 67e6138066
5 changed files with 23 additions and 7 deletions

View File

@ -50,6 +50,7 @@ class ListenerManagement:
embedding_by_dataset_signal = signal("embedding_by_dataset")
embedding_by_document_signal = signal("embedding_by_document")
delete_embedding_by_document_signal = signal("delete_embedding_by_document")
delete_embedding_by_document_list_signal = signal("delete_embedding_by_document_list")
delete_embedding_by_dataset_signal = signal("delete_embedding_by_dataset")
delete_embedding_by_paragraph_signal = signal("delete_embedding_by_paragraph")
delete_embedding_by_source_signal = signal("delete_embedding_by_source")
@ -144,6 +145,10 @@ class ListenerManagement:
def delete_embedding_by_document(document_id):
VectorStore.get_embedding_vector().delete_by_document_id(document_id)
@staticmethod
def delete_embedding_by_document_list(document_id_list: List[str]):
VectorStore.get_embedding_vector().delete_bu_document_id_list(document_id_list)
@staticmethod
def delete_embedding_by_dataset(dataset_id):
VectorStore.get_embedding_vector().delete_by_dataset_id(dataset_id)
@ -201,6 +206,8 @@ class ListenerManagement:
self.embedding_by_document)
# 删除 向量 根据文档
ListenerManagement.delete_embedding_by_document_signal.connect(self.delete_embedding_by_document)
# 删除 向量 根据文档id列表
ListenerManagement.delete_embedding_by_document_list_signal.connect(self.delete_embedding_by_document_list)
# 删除 向量 根据知识库id
ListenerManagement.delete_embedding_by_dataset_signal.connect(self.delete_embedding_by_dataset)
# 删除向量 根据段落id

View File

@ -547,7 +547,12 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
if with_valid:
BatchSerializer(data=instance).is_valid(model=Document, raise_exception=True)
self.is_valid(raise_exception=True)
QuerySet(Document).filter(id__in=instance.get('id_list')).delete()
document_id_list = instance.get("id_list")
QuerySet(Document).filter(id__in=document_id_list).delete()
QuerySet(Paragraph).filter(document_id__in=document_id_list).delete()
QuerySet(Problem).filter(document_id__in=document_id_list).delete()
# 删除向量库
ListenerManagement.delete_embedding_by_document_list_signal.send(document_id_list)
return True

View File

@ -9,6 +9,7 @@
import uuid
from typing import Dict
from django.db import transaction
from django.db.models import QuerySet
from drf_yasg import openapi
from rest_framework import serializers
@ -61,6 +62,7 @@ class ProblemSerializers(ApiMixin, serializers.Serializer):
dataset_id=self.data.get('dataset_id')).exists():
raise AppApiException(500, "段落id不正确")
@transaction.atomic
def save(self, instance: Dict, with_valid=True, with_embedding=True):
if with_valid:
self.is_valid()

View File

@ -51,8 +51,6 @@ class BaseVectorStore(ABC):
def save(self, text, source_type: SourceType, dataset_id: str, document_id: str, paragraph_id: str, source_id: str,
is_active: bool,
star_num: int,
trample_num: int,
embedding=None):
"""
插入向量数据
@ -64,16 +62,13 @@ class BaseVectorStore(ABC):
:param is_active: 是否禁用
:param embedding: 向量化处理器
:param paragraph_id 段落id
:param star_num 点赞数量
:param trample_num 点踩数量
:return: bool
"""
if embedding is None:
embedding = EmbeddingModel.get_embedding_model()
self.save_pre_handler()
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
trample_num, embedding)
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, embedding)
def batch_save(self, data_list: List[Dict], embedding=None):
# 获取锁
@ -143,6 +138,10 @@ class BaseVectorStore(ABC):
def delete_by_document_id(self, document_id: str):
pass
@abstractmethod
def delete_bu_document_id_list(self, document_id_list: List[str]):
pass
@abstractmethod
def delete_by_source_id(self, source_id: str, source_type: str):
pass

View File

@ -107,6 +107,9 @@ class PGVector(BaseVectorStore):
QuerySet(Embedding).filter(document_id=document_id).delete()
return True
def delete_bu_document_id_list(self, document_id_list: List[str]):
return QuerySet(Embedding).filter(document_id__in=document_id_list).delete()
def delete_by_source_id(self, source_id: str, source_type: str):
QuerySet(Embedding).filter(source_id=source_id, source_type=source_type).delete()
return True