From 4cda73ba572fd62d1dfe0880ad2eebb6cb586403 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Thu, 23 Oct 2025 17:43:28 +0800 Subject: [PATCH] refactor: optimize document matching logic for tag conditions --- .../impl/base_search_document_node.py | 67 ++++++++++++------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/apps/application/flow/step_node/search_document_node/impl/base_search_document_node.py b/apps/application/flow/step_node/search_document_node/impl/base_search_document_node.py index 980cc2fd1..52101937e 100644 --- a/apps/application/flow/step_node/search_document_node/impl/base_search_document_node.py +++ b/apps/application/flow/step_node/search_document_node/impl/base_search_document_node.py @@ -121,44 +121,63 @@ class BaseSearchDocumentNode(ISearchDocumentStepNode): compare_type = condition['compare'] # 构建查询条件 - if compare_type == 'contain': - q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value) - elif compare_type == 'eq': - q_filter = Q(tag__key=tag_key, tag__value=field_value) - elif compare_type == 'not_contain': - q_filter = ~Q(tag__key=tag_key, tag__value__icontains=field_value) + if compare_type == 'not_contain': + # 反向查询:找出包含该标签的文档,然后排除 + exclude_docs = set(QuerySet(DocumentTag).filter( + document_id__in=matched_doc_ids, + tag__key=tag_key, + tag__value__icontains=field_value + ).values_list('document_id', flat=True).distinct()) + + matched_doc_ids = matched_doc_ids - exclude_docs else: - continue + if compare_type == 'contain': + q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value) + elif compare_type == 'eq': + q_filter = Q(tag__key=tag_key, tag__value=field_value) + else: + continue - # 单次查询获取符合条件的文档 - tag_docs = set(QuerySet(DocumentTag).filter( - document_id__in=matched_doc_ids - ).filter(q_filter).values_list('document_id', flat=True).distinct()) + # 单次查询获取符合条件的文档 + tag_docs = set(QuerySet(DocumentTag).filter( + document_id__in=matched_doc_ids + ).filter(q_filter).values_list('document_id', flat=True).distinct()) - matched_doc_ids = matched_doc_ids.intersection(tag_docs) + matched_doc_ids = matched_doc_ids.intersection(tag_docs) return matched_doc_ids else: - # OR逻辑:使用一次查询完成 - q_objects = Q() + # OR逻辑 + matched_docs = set() for condition in search_condition_list: tag_key = condition['key'] field_value = self.workflow_manage.generate_prompt(condition['value']) compare_type = condition['compare'] - if compare_type == 'contain': - q_objects |= Q(tag__key=tag_key, tag__value__icontains=field_value) - elif compare_type == 'eq': - q_objects |= Q(tag__key=tag_key, tag__value=field_value) - elif compare_type == 'not_contain': - q_objects |= ~Q(tag__key=tag_key, tag__value__icontains=field_value) + if compare_type == 'not_contain': + # 反向查询:找出包含该标签的文档,然后用全集减去 + exclude_docs = set(QuerySet(DocumentTag).filter( + document_id__in=document_id_list, + tag__key=tag_key, + tag__value__icontains=field_value + ).values_list('document_id', flat=True).distinct()) - # 一次查询获取所有匹配的文档 - matched_docs = set(QuerySet(DocumentTag).filter( - document_id__in=document_id_list - ).filter(q_objects).values_list('document_id', flat=True).distinct()) + matched_docs = matched_docs.union(set(document_id_list) - exclude_docs) + else: + if compare_type == 'contain': + q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value) + elif compare_type == 'eq': + q_filter = Q(tag__key=tag_key, tag__value=field_value) + else: + continue + + docs = set(QuerySet(DocumentTag).filter( + document_id__in=document_id_list + ).filter(q_filter).values_list('document_id', flat=True).distinct()) + + matched_docs = matched_docs.union(docs) return matched_docs