From 04f34d748eb0771f075b7da94c2e775c37f49083 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Mon, 29 Jan 2024 17:07:07 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E3=80=90=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E3=80=91=E6=95=B4=E4=BD=93=E5=90=8C=E6=AD=A5=EF=BC=8C=E5=8F=AA?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E4=BA=86=E6=B2=A1=E6=9C=89=E5=90=8C=E6=AD=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/event/common.py | 9 +++++++++ apps/common/event/listener_manage.py | 8 ++++---- apps/dataset/serializers/dataset_serializers.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/apps/common/event/common.py b/apps/common/event/common.py index bd553f07f..e35123758 100644 --- a/apps/common/event/common.py +++ b/apps/common/event/common.py @@ -10,9 +10,18 @@ from concurrent.futures import ThreadPoolExecutor work_thread_pool = ThreadPoolExecutor(5) +embedding_thread_pool = ThreadPoolExecutor(3) + def poxy(poxy_function): def inner(args): work_thread_pool.submit(poxy_function, args) return inner + + +def embedding_poxy(poxy_function): + def inner(args): + embedding_thread_pool.submit(poxy_function, args) + + return inner diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index 158f1cd67..9fc25e3b8 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -17,7 +17,7 @@ from django.db.models import QuerySet from common.config.embedding_config import VectorStore, EmbeddingModel from common.db.search import native_search, get_dynamics_model -from common.event.common import poxy +from common.event.common import poxy, embedding_poxy from common.util.file_util import get_file_content from common.util.fork import ForkManage, Fork from common.util.lock import try_lock, un_lock @@ -65,7 +65,7 @@ class ListenerManagement: VectorStore.get_embedding_vector().save(**args) @staticmethod - @poxy + @embedding_poxy def embedding_by_paragraph(paragraph_id): """ 向量化段落 根据段落id @@ -93,7 +93,7 @@ class ListenerManagement: max_kb.info(f'结束--->向量化段落:{paragraph_id}') @staticmethod - @poxy + @embedding_poxy def embedding_by_document(document_id): """ 向量化文档 @@ -123,7 +123,7 @@ class ListenerManagement: max_kb.info(f"结束--->向量化文档:{document_id}") @staticmethod - @poxy + @embedding_poxy def embedding_by_dataset(dataset_id): """ 向量化知识库 diff --git a/apps/dataset/serializers/dataset_serializers.py b/apps/dataset/serializers/dataset_serializers.py index 573684f69..55d709ae5 100644 --- a/apps/dataset/serializers/dataset_serializers.py +++ b/apps/dataset/serializers/dataset_serializers.py @@ -503,7 +503,7 @@ class DataSetSerializers(serializers.ModelSerializer): document_name = child_link.tag.text if child_link.tag is not None and len( child_link.tag.text.strip()) > 0 else child_link.url paragraphs = get_split_model('web.md').parse(response.content) - first = QuerySet(Document).filter(meta__source_url=child_link.url).first() + first = QuerySet(Document).filter(meta__source_url=child_link.url, dataset=dataset).first() if first is not None: # 如果存在,使用文档同步 DocumentSerializers.Sync(data={'document_id': first.id}).sync()