mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
feat: add create and drop knowledge index functions for improved database management
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
This commit is contained in:
parent
fc4a7df669
commit
6ce1eb7116
|
|
@ -26,6 +26,7 @@ from common.utils.logger import maxkb_logger
|
|||
from common.utils.page_utils import page_desc
|
||||
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State, SourceType, \
|
||||
SearchMode
|
||||
from knowledge.serializers.common import create_knowledge_index
|
||||
from maxkb.conf import (PROJECT_DIR)
|
||||
|
||||
lock = threading.Lock()
|
||||
|
|
@ -290,6 +291,8 @@ class ListenerManagement:
|
|||
ListenerManagement.get_aggregation_document_status(
|
||||
document_id)),
|
||||
is_the_task_interrupted)
|
||||
# 检查是否存在索引
|
||||
create_knowledge_index(document_id=document_id)
|
||||
except Exception as e:
|
||||
maxkb_logger.error(_('Vectorized document: {document_id} error {error} {traceback}').format(
|
||||
document_id=document_id, error=str(e), traceback=traceback.format_exc()))
|
||||
|
|
|
|||
|
|
@ -18,10 +18,12 @@ from rest_framework import serializers
|
|||
|
||||
from common.config.embedding_config import ModelManage
|
||||
from common.db.search import native_search
|
||||
from common.db.sql_execute import update_execute
|
||||
from common.db.sql_execute import sql_execute, update_execute
|
||||
from common.exception.app_exception import AppApiException
|
||||
from common.utils.common import get_file_content
|
||||
from common.utils.fork import Fork
|
||||
from common.utils.logger import maxkb_logger
|
||||
from knowledge.models import Document
|
||||
from knowledge.models import Paragraph, Problem, ProblemParagraphMapping, Knowledge, File
|
||||
from maxkb.conf import PROJECT_DIR
|
||||
from models_provider.tools import get_model
|
||||
|
|
@ -220,3 +222,44 @@ def get_knowledge_operation_object(knowledge_id: str):
|
|||
"update_time": knowledge_model.update_time
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
def create_knowledge_index(knowledge_id=None, document_id=None):
|
||||
if knowledge_id is None and document_id is None:
|
||||
raise AppApiException(500, _('Knowledge ID or Document ID must be provided'))
|
||||
|
||||
if knowledge_id is not None:
|
||||
k_id = knowledge_id
|
||||
else:
|
||||
document = QuerySet(Document).filter(id=document_id).first()
|
||||
k_id = document.knowledge_id
|
||||
|
||||
sql = f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'embedding' AND indexname = 'embedding_hnsw_idx_{k_id}'"
|
||||
index = sql_execute(sql, [])
|
||||
if not index:
|
||||
sql = f"SELECT vector_dims(embedding) AS dims FROM embedding WHERE knowledge_id = '{k_id}' LIMIT 1"
|
||||
result = sql_execute(sql, [])
|
||||
if len(result) == 0:
|
||||
return
|
||||
dims = result[0]['dims']
|
||||
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_l2_ops) WHERE knowledge_id = '{k_id}'"""
|
||||
update_execute(sql, [])
|
||||
maxkb_logger.info(f'Created index for knowledge ID: {k_id}')
|
||||
|
||||
|
||||
def drop_knowledge_index(knowledge_id=None, document_id=None):
|
||||
if knowledge_id is None and document_id is None:
|
||||
raise AppApiException(500, _('Knowledge ID or Document ID must be provided'))
|
||||
|
||||
if knowledge_id is not None:
|
||||
k_id = knowledge_id
|
||||
else:
|
||||
document = QuerySet(Document).filter(id=document_id).first()
|
||||
k_id = document.knowledge_id
|
||||
|
||||
sql = f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'embedding' AND indexname = 'embedding_hnsw_idx_{k_id}'"
|
||||
index = sql_execute(sql, [])
|
||||
if index:
|
||||
sql = f'DROP INDEX "embedding_hnsw_idx_{k_id}"'
|
||||
update_execute(sql, [])
|
||||
maxkb_logger.info(f'Dropped index for knowledge ID: {k_id}')
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ from common.utils.logger import maxkb_logger
|
|||
from common.utils.split_model import get_split_model
|
||||
from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \
|
||||
ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder, File
|
||||
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
|
||||
from knowledge.serializers.common import ProblemParagraphManage, drop_knowledge_index, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
|
||||
GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph, write_image, zip_dir
|
||||
from knowledge.serializers.document import DocumentSerializers
|
||||
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
|
||||
|
|
@ -418,6 +418,7 @@ class KnowledgeSerializer(serializers.Serializer):
|
|||
QuerySet(Problem).filter(knowledge=knowledge).delete()
|
||||
QuerySet(WorkspaceUserResourcePermission).filter(target=knowledge.id).delete()
|
||||
QuerySet(ApplicationKnowledgeMapping).filter(knowledge_id=knowledge.id).delete()
|
||||
drop_knowledge_index(knowledge_id=knowledge.id)
|
||||
knowledge.delete()
|
||||
File.objects.filter(
|
||||
source_id=knowledge.id,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from common.event import ListenerManagement, UpdateProblemArgs, UpdateEmbeddingK
|
|||
UpdateEmbeddingDocumentIdArgs
|
||||
from common.utils.logger import maxkb_logger
|
||||
from knowledge.models import Document, TaskType, State
|
||||
from knowledge.serializers.common import drop_knowledge_index
|
||||
from models_provider.tools import get_model
|
||||
from models_provider.models import Model
|
||||
from ops import celery_app
|
||||
|
|
@ -102,6 +103,7 @@ def embedding_by_knowledge(knowledge_id, model_id):
|
|||
maxkb_logger.info(_('Start--->Vectorized knowledge: {knowledge_id}').format(knowledge_id=knowledge_id))
|
||||
try:
|
||||
ListenerManagement.delete_embedding_by_knowledge(knowledge_id)
|
||||
drop_knowledge_index(knowledge_id=knowledge_id)
|
||||
document_list = QuerySet(Document).filter(knowledge_id=knowledge_id)
|
||||
maxkb_logger.info(_('Knowledge documentation: {document_names}').format(
|
||||
document_names=", ".join([d.name for d in document_list])))
|
||||
|
|
|
|||
Loading…
Reference in New Issue