From 219fe97c873674beafbc35fcdd4021b41c12c766 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 21 Oct 2025 21:18:24 +0800 Subject: [PATCH] feat: Add document_id_list parameter to query methods in vector classes --- .../search_dataset_step/impl/base_search_dataset_step.py | 2 +- .../impl/base_search_knowledge_node.py | 6 ++++-- apps/knowledge/vector/base_vector.py | 1 + apps/knowledge/vector/pg_vector.py | 3 +++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py b/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py index aed641f7b..ee6c2ff1d 100644 --- a/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py +++ b/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py @@ -67,7 +67,7 @@ class BaseSearchDatasetStep(ISearchDatasetStep): embedding_model = ModelManage.get_model(model_id, lambda _id: get_model(model)) embedding_value = embedding_model.embed_query(exec_problem_text) vector = VectorStore.get_embedding_vector() - embedding_list = vector.query(exec_problem_text, embedding_value, knowledge_id_list, exclude_document_id_list, + embedding_list = vector.query(exec_problem_text, embedding_value, knowledge_id_list, None, exclude_document_id_list, exclude_paragraph_id_list, True, top_n, similarity, SearchMode(search_mode)) if embedding_list is None: return [] diff --git a/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py b/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py index c1779d7e2..4b6e96f17 100644 --- a/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py +++ b/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py @@ -79,12 +79,14 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode): self.context['question'] = question self.context['show_knowledge'] = show_knowledge + document_id_list = None if search_scope_type == 'referencing': # 引用上一步知识库/文档 if search_scope_source == 'knowledge': # 知识库 knowledge_id_list = self.get_reference_content(search_scope_reference) else: # 文档 + document_id_list = self.get_reference_content(search_scope_reference) knowledge_id_list = QuerySet(Document).filter( - id__in=self.get_reference_content(search_scope_reference) + id__in=document_id_list ).values_list( 'knowledge_id', flat=True ).distinct() @@ -105,7 +107,7 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode): QuerySet(Document).filter( knowledge_id__in=knowledge_id_list, is_active=False)] - embedding_list = vector.query(question, embedding_value, knowledge_id_list, exclude_document_id_list, + embedding_list = vector.query(question, embedding_value, knowledge_id_list, document_id_list, exclude_document_id_list, exclude_paragraph_id_list, True, knowledge_setting.get('top_n'), knowledge_setting.get('similarity'), SearchMode(knowledge_setting.get('search_mode'))) diff --git a/apps/knowledge/vector/base_vector.py b/apps/knowledge/vector/base_vector.py index 07f68e4c8..92f250038 100644 --- a/apps/knowledge/vector/base_vector.py +++ b/apps/knowledge/vector/base_vector.py @@ -126,6 +126,7 @@ class BaseVectorStore(ABC): @abstractmethod def query(self, query_text: str, query_embedding: List[float], knowledge_id_list: list[str], + document_id_list: list[str] | None, exclude_document_id_list: list[str], exclude_paragraph_list: list[str], is_active: bool, top_n: int, similarity: float, search_mode: SearchMode): diff --git a/apps/knowledge/vector/pg_vector.py b/apps/knowledge/vector/pg_vector.py index f787cd83f..21e38cc3b 100644 --- a/apps/knowledge/vector/pg_vector.py +++ b/apps/knowledge/vector/pg_vector.py @@ -97,6 +97,7 @@ class PGVector(BaseVectorStore): return search_handle.handle(query_set, query_text, embedding_query, top_number, similarity, search_mode) def query(self, query_text: str, query_embedding: List[float], knowledge_id_list: list[str], + document_id_list: list[str], exclude_document_id_list: list[str], exclude_paragraph_list: list[str], is_active: bool, top_n: int, similarity: float, search_mode: SearchMode): @@ -104,6 +105,8 @@ class PGVector(BaseVectorStore): if knowledge_id_list is None or len(knowledge_id_list) == 0: return [] query_set = QuerySet(Embedding).filter(knowledge_id__in=knowledge_id_list, is_active=is_active) + if document_id_list is not None and len(document_id_list) > 0: + query_set = query_set.filter(document_id__in=document_id_list) if exclude_document_id_list is not None and len(exclude_document_id_list) > 0: query_set = query_set.exclude(document_id__in=exclude_document_id_list) if exclude_paragraph_list is not None and len(exclude_paragraph_list) > 0: