fix: 【知识库】整体同步,只删除了没有同步

This commit is contained in:
shaohuzhang1 2024-01-29 17:07:07 +08:00
parent 1254e5c5ff
commit 04f34d748e
3 changed files with 14 additions and 5 deletions

View File

@ -10,9 +10,18 @@ from concurrent.futures import ThreadPoolExecutor
work_thread_pool = ThreadPoolExecutor(5)
embedding_thread_pool = ThreadPoolExecutor(3)
def poxy(poxy_function):
def inner(args):
work_thread_pool.submit(poxy_function, args)
return inner
def embedding_poxy(poxy_function):
def inner(args):
embedding_thread_pool.submit(poxy_function, args)
return inner

View File

@ -17,7 +17,7 @@ from django.db.models import QuerySet
from common.config.embedding_config import VectorStore, EmbeddingModel
from common.db.search import native_search, get_dynamics_model
from common.event.common import poxy
from common.event.common import poxy, embedding_poxy
from common.util.file_util import get_file_content
from common.util.fork import ForkManage, Fork
from common.util.lock import try_lock, un_lock
@ -65,7 +65,7 @@ class ListenerManagement:
VectorStore.get_embedding_vector().save(**args)
@staticmethod
@poxy
@embedding_poxy
def embedding_by_paragraph(paragraph_id):
"""
向量化段落 根据段落id
@ -93,7 +93,7 @@ class ListenerManagement:
max_kb.info(f'结束--->向量化段落:{paragraph_id}')
@staticmethod
@poxy
@embedding_poxy
def embedding_by_document(document_id):
"""
向量化文档
@ -123,7 +123,7 @@ class ListenerManagement:
max_kb.info(f"结束--->向量化文档:{document_id}")
@staticmethod
@poxy
@embedding_poxy
def embedding_by_dataset(dataset_id):
"""
向量化知识库

View File

@ -503,7 +503,7 @@ class DataSetSerializers(serializers.ModelSerializer):
document_name = child_link.tag.text if child_link.tag is not None and len(
child_link.tag.text.strip()) > 0 else child_link.url
paragraphs = get_split_model('web.md').parse(response.content)
first = QuerySet(Document).filter(meta__source_url=child_link.url).first()
first = QuerySet(Document).filter(meta__source_url=child_link.url, dataset=dataset).first()
if first is not None:
# 如果存在,使用文档同步
DocumentSerializers.Sync(data={'document_id': first.id}).sync()