diff --git a/apps/common/util/common.py b/apps/common/util/common.py index 6cf4b29c8..4ed167bb2 100644 --- a/apps/common/util/common.py +++ b/apps/common/util/common.py @@ -8,7 +8,20 @@ """ import importlib from functools import reduce -from typing import Dict +from typing import Dict, List + + +def sub_array(array: List, item_num=50): + result = [] + temp = [] + for item in array: + temp.append(item) + if len(temp) >= item_num: + result.append(temp) + temp = [] + if len(temp) > 0: + result.append(temp) + return result def query_params_to_single_dict(query_params: Dict): diff --git a/apps/embedding/vector/base_vector.py b/apps/embedding/vector/base_vector.py index ecd9bdfaf..4a19f838d 100644 --- a/apps/embedding/vector/base_vector.py +++ b/apps/embedding/vector/base_vector.py @@ -12,6 +12,7 @@ from typing import List, Dict from langchain.embeddings import HuggingFaceEmbeddings from common.config.embedding_config import EmbeddingModel +from common.util.common import sub_array from embedding.models import SourceType @@ -80,7 +81,9 @@ class BaseVectorStore(ABC): if embedding is None: embedding = EmbeddingModel.get_embedding_model() self.save_pre_handler() - self._batch_save(data_list, embedding) + result = sub_array(data_list) + for child_array in result: + self._batch_save(child_array, embedding) return True @abstractmethod