From 740f1d3dd14a04fd15e13d50a3b8ca5ce0a9b391 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Fri, 15 Dec 2023 14:22:19 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=90=91=E9=87=8F=E5=8C=96=E7=9A=84?= =?UTF-8?q?=E6=97=B6=E5=80=99=E9=99=90=E5=88=B6=E6=9C=80=E5=A4=A7=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/util/common.py | 15 ++++++++++++++- apps/embedding/vector/base_vector.py | 5 ++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/apps/common/util/common.py b/apps/common/util/common.py index 6cf4b29c8..4ed167bb2 100644 --- a/apps/common/util/common.py +++ b/apps/common/util/common.py @@ -8,7 +8,20 @@ """ import importlib from functools import reduce -from typing import Dict +from typing import Dict, List + + +def sub_array(array: List, item_num=50): + result = [] + temp = [] + for item in array: + temp.append(item) + if len(temp) >= item_num: + result.append(temp) + temp = [] + if len(temp) > 0: + result.append(temp) + return result def query_params_to_single_dict(query_params: Dict): diff --git a/apps/embedding/vector/base_vector.py b/apps/embedding/vector/base_vector.py index ecd9bdfaf..4a19f838d 100644 --- a/apps/embedding/vector/base_vector.py +++ b/apps/embedding/vector/base_vector.py @@ -12,6 +12,7 @@ from typing import List, Dict from langchain.embeddings import HuggingFaceEmbeddings from common.config.embedding_config import EmbeddingModel +from common.util.common import sub_array from embedding.models import SourceType @@ -80,7 +81,9 @@ class BaseVectorStore(ABC): if embedding is None: embedding = EmbeddingModel.get_embedding_model() self.save_pre_handler() - self._batch_save(data_list, embedding) + result = sub_array(data_list) + for child_array in result: + self._batch_save(child_array, embedding) return True @abstractmethod