From b52c972ac006a3aa6742827844a3527d1c90fecf Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Mon, 7 Jul 2025 21:42:17 +0800 Subject: [PATCH] feat: Dialogue displays knowledge sources (#3501) --- .../chat_pipeline/I_base_chat_pipeline.py | 25 +++++--- .../step/chat_step/impl/base_chat_step.py | 5 +- .../impl/base_search_dataset_step.py | 3 +- .../reranker_node/i_reranker_node.py | 4 +- .../reranker_node/impl/base_reranker_node.py | 12 ++-- .../i_search_knowledge_node.py | 5 +- .../impl/base_search_knowledge_node.py | 4 +- .../serializers/application_chat_record.py | 60 ++++++++++++++----- apps/application/serializers/common.py | 17 +++++- ...st_knowledge_paragraph_by_paragraph_id.sql | 1 + apps/chat/serializers/chat_record.py | 10 +++- .../lang/zh-CN/views/application-workflow.ts | 4 ++ ui/src/workflow/nodes/reranker-node/index.vue | 9 +++ .../nodes/search-knowledge-node/index.vue | 15 +++++ 14 files changed, 135 insertions(+), 39 deletions(-) diff --git a/apps/application/chat_pipeline/I_base_chat_pipeline.py b/apps/application/chat_pipeline/I_base_chat_pipeline.py index 8ef4896f3..160a4097f 100644 --- a/apps/application/chat_pipeline/I_base_chat_pipeline.py +++ b/apps/application/chat_pipeline/I_base_chat_pipeline.py @@ -18,8 +18,9 @@ from knowledge.models import Paragraph class ParagraphPipelineModel: def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str, title: str, status: str, - is_active: bool, comprehensive_score: float, similarity: float, dataset_name: str, document_name: str, - hit_handling_method: str, directly_return_similarity: float, meta: dict = None): + is_active: bool, comprehensive_score: float, similarity: float, knowledge_name: str, + document_name: str, + hit_handling_method: str, directly_return_similarity: float, knowledge_type, meta: dict = None): self.id = _id self.document_id = document_id self.knowledge_id = knowledge_id @@ -29,11 +30,12 @@ class ParagraphPipelineModel: self.is_active = is_active self.comprehensive_score = comprehensive_score self.similarity = similarity - self.dataset_name = dataset_name + self.knowledge_name = knowledge_name self.document_name = document_name self.hit_handling_method = hit_handling_method self.directly_return_similarity = directly_return_similarity self.meta = meta + self.knowledge_type = knowledge_type def to_dict(self): return { @@ -46,8 +48,9 @@ class ParagraphPipelineModel: 'is_active': self.is_active, 'comprehensive_score': self.comprehensive_score, 'similarity': self.similarity, - 'dataset_name': self.dataset_name, + 'knowledge_name': self.knowledge_name, 'document_name': self.document_name, + 'knowledge_type': self.knowledge_type, 'meta': self.meta, } @@ -57,7 +60,8 @@ class ParagraphPipelineModel: self.paragraph = {} self.comprehensive_score = None self.document_name = None - self.dataset_name = None + self.knowledge_name = None + self.knowledge_type = None self.hit_handling_method = None self.directly_return_similarity = 0.9 self.meta = {} @@ -76,8 +80,12 @@ class ParagraphPipelineModel: self.paragraph = paragraph return self - def add_dataset_name(self, dataset_name): - self.dataset_name = dataset_name + def add_knowledge_name(self, knowledge_name): + self.knowledge_name = knowledge_name + return self + + def add_knowledge_type(self, knowledge_type): + self.knowledge_type = knowledge_type return self def add_document_name(self, document_name): @@ -110,8 +118,9 @@ class ParagraphPipelineModel: self.paragraph.get('content'), self.paragraph.get('title'), self.paragraph.get('status'), self.paragraph.get('is_active'), - self.comprehensive_score, self.similarity, self.dataset_name, + self.comprehensive_score, self.similarity, self.knowledge_name, self.document_name, self.hit_handling_method, self.directly_return_similarity, + self.knowledge_type, self.meta) diff --git a/apps/application/chat_pipeline/step/chat_step/impl/base_chat_step.py b/apps/application/chat_pipeline/step/chat_step/impl/base_chat_step.py index 135c314d3..295680713 100644 --- a/apps/application/chat_pipeline/step/chat_step/impl/base_chat_step.py +++ b/apps/application/chat_pipeline/step/chat_step/impl/base_chat_step.py @@ -18,7 +18,7 @@ from django.utils.translation import gettext as _ from langchain.chat_models.base import BaseChatModel from langchain.schema import BaseMessage from langchain.schema.messages import HumanMessage, AIMessage -from langchain_core.messages import AIMessageChunk +from langchain_core.messages import AIMessageChunk, SystemMessage from rest_framework import status from application.chat_pipeline.I_base_chat_pipeline import ParagraphPipelineModel @@ -196,7 +196,8 @@ class BaseChatStep(IChatStep): @staticmethod def reset_message_list(message_list: List[BaseMessage], answer_text): - result = [{'role': 'user' if isinstance(message, HumanMessage) else 'ai', 'content': message.content} for + result = [{'role': 'user' if isinstance(message, HumanMessage) else ( + 'system' if isinstance(message, SystemMessage) else 'ai'), 'content': message.content} for message in message_list] diff --git a/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py b/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py index 6adb2b4c4..2941d6537 100644 --- a/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py +++ b/apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py @@ -79,7 +79,8 @@ class BaseSearchDatasetStep(ISearchDatasetStep): .add_paragraph(paragraph) .add_similarity(find_embedding.get('similarity')) .add_comprehensive_score(find_embedding.get('comprehensive_score')) - .add_dataset_name(paragraph.get('dataset_name')) + .add_knowledge_name(paragraph.get('knowledge_name')) + .add_knowledge_type(paragraph.get('knowledge_type')) .add_document_name(paragraph.get('document_name')) .add_hit_handling_method(paragraph.get('hit_handling_method')) .add_directly_return_similarity(paragraph.get('directly_return_similarity')) diff --git a/apps/application/flow/step_node/reranker_node/i_reranker_node.py b/apps/application/flow/step_node/reranker_node/i_reranker_node.py index f2e87a47e..d0164393d 100644 --- a/apps/application/flow/step_node/reranker_node/i_reranker_node.py +++ b/apps/application/flow/step_node/reranker_node/i_reranker_node.py @@ -32,6 +32,8 @@ class RerankerStepNodeSerializer(serializers.Serializer): question_reference_address = serializers.ListField(required=True) reranker_model_id = serializers.UUIDField(required=True) reranker_reference_list = serializers.ListField(required=True, child=serializers.ListField(required=True)) + show_knowledge = serializers.BooleanField(required=True, + label=_("The results are displayed in the knowledge sources")) def is_valid(self, *, raise_exception=False): super().is_valid(raise_exception=True) @@ -55,6 +57,6 @@ class IRerankerNode(INode): reranker_list=reranker_list) - def execute(self, question, reranker_setting, reranker_list, reranker_model_id, + def execute(self, question, reranker_setting, reranker_list, reranker_model_id,show_knowledge, **kwargs) -> NodeResult: pass diff --git a/apps/application/flow/step_node/reranker_node/impl/base_reranker_node.py b/apps/application/flow/step_node/reranker_node/impl/base_reranker_node.py index 0639f21cf..b3f17c364 100644 --- a/apps/application/flow/step_node/reranker_node/impl/base_reranker_node.py +++ b/apps/application/flow/step_node/reranker_node/impl/base_reranker_node.py @@ -24,11 +24,9 @@ def merge_reranker_list(reranker_list, result=None): elif isinstance(document, dict): content = document.get('title', '') + document.get('content', '') title = document.get("title") - dataset_name = document.get("dataset_name") - document_name = document.get('document_name') result.append( Document(page_content=str(document) if len(content) == 0 else content, - metadata={'title': title, 'dataset_name': dataset_name, 'document_name': document_name})) + metadata={'title': title, **document})) else: result.append(Document(page_content=str(document), metadata={})) return result @@ -71,8 +69,9 @@ class BaseRerankerNode(IRerankerNode): self.context['result_list'] = details.get('result_list') self.context['result'] = details.get('result') - def execute(self, question, reranker_setting, reranker_list, reranker_model_id, + def execute(self, question, reranker_setting, reranker_list, reranker_model_id, show_knowledge, **kwargs) -> NodeResult: + self.context['show_knowledge'] = show_knowledge documents = merge_reranker_list(reranker_list) top_n = reranker_setting.get('top_n', 3) self.context['document_list'] = [{'page_content': document.page_content, 'metadata': document.metadata} for @@ -80,8 +79,8 @@ class BaseRerankerNode(IRerankerNode): self.context['question'] = question workspace_id = self.workflow_manage.get_body().get('workspace_id') reranker_model = get_model_instance_by_model_workspace_id(reranker_model_id, - workspace_id, - top_n=top_n) + workspace_id, + top_n=top_n) result = reranker_model.compress_documents( documents, question) @@ -93,6 +92,7 @@ class BaseRerankerNode(IRerankerNode): def get_details(self, index: int, **kwargs): return { + 'show_knowledge': self.context.get('show_knowledge'), 'name': self.node.properties.get('stepName'), "index": index, 'document_list': self.context.get('document_list'), diff --git a/apps/application/flow/step_node/search_knowledge_node/i_search_knowledge_node.py b/apps/application/flow/step_node/search_knowledge_node/i_search_knowledge_node.py index 4f55f3c4b..9f07f327a 100644 --- a/apps/application/flow/step_node/search_knowledge_node/i_search_knowledge_node.py +++ b/apps/application/flow/step_node/search_knowledge_node/i_search_knowledge_node.py @@ -41,6 +41,9 @@ class SearchDatasetStepNodeSerializer(serializers.Serializer): question_reference_address = serializers.ListField(required=True) + show_knowledge = serializers.BooleanField(required=True, + label=_("The results are displayed in the knowledge sources")) + def is_valid(self, *, raise_exception=False): super().is_valid(raise_exception=True) @@ -73,7 +76,7 @@ class ISearchKnowledgeStepNode(INode): return self.execute(**self.node_params_serializer.data, question=str(question), exclude_paragraph_id_list=exclude_paragraph_id_list) - def execute(self, dataset_id_list, dataset_setting, question, + def execute(self, dataset_id_list, dataset_setting, question, show_knowledge, exclude_paragraph_id_list=None, **kwargs) -> NodeResult: pass diff --git a/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py b/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py index 22abe7521..eb9130725 100644 --- a/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py +++ b/apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py @@ -62,10 +62,11 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode): result])[0:dataset_setting.get('max_paragraph_char_number', 5000)] self.context['directly_return'] = directly_return - def execute(self, knowledge_id_list, knowledge_setting, question, + def execute(self, knowledge_id_list, knowledge_setting, question, show_knowledge, exclude_paragraph_id_list=None, **kwargs) -> NodeResult: self.context['question'] = question + self.context['show_knowledge'] = show_knowledge get_knowledge_list_of_authorized = DatabaseModelManage.get_model('get_knowledge_list_of_authorized') chat_user_type = self.workflow_manage.get_body().get('chat_user_type') if get_knowledge_list_of_authorized is not None and RoleConstants.CHAT_USER.value.name == chat_user_type: @@ -145,6 +146,7 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode): def get_details(self, index: int, **kwargs): return { 'name': self.node.properties.get('stepName'), + 'show_knowledge': self.context.get('show_knowledge'), 'question': self.context.get('question'), "index": index, 'run_time': self.context.get('run_time'), diff --git a/apps/application/serializers/application_chat_record.py b/apps/application/serializers/application_chat_record.py index 324aaa934..32e2f2ad3 100644 --- a/apps/application/serializers/application_chat_record.py +++ b/apps/application/serializers/application_chat_record.py @@ -75,7 +75,15 @@ class ChatRecordOperateSerializer(serializers.Serializer): chat_record = self.get_chat_record() if chat_record is None: raise AppApiException(500, gettext("Conversation does not exist")) - return ApplicationChatRecordQuerySerializers.reset_chat_record(chat_record) + application_access_token = QuerySet(ApplicationAccessToken).filter( + application_id=self.data.get('application_id')).first() + show_source = False + show_exec = False + if application_access_token is not None: + show_exec = application_access_token.show_exec + show_source = application_access_token.show_source + return ApplicationChatRecordQuerySerializers.reset_chat_record( + chat_record, show_source, show_exec) class ApplicationChatRecordQuerySerializers(serializers.Serializer): @@ -103,21 +111,34 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer): QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by)] @staticmethod - def reset_chat_record(chat_record): + def reset_chat_record(chat_record, show_source, show_exec): knowledge_list = [] paragraph_list = [] - if 'search_step' in chat_record.details and chat_record.details.get('search_step').get( 'paragraph_list') is not None: paragraph_list = chat_record.details.get('search_step').get( 'paragraph_list') - knowledge_list = [{'id': dataset_id, 'name': name} for dataset_id, name in reduce(lambda x, y: {**x, **y}, - [{row.get( - 'knowledge_id'): row.get( - "knowledge_name")} for - row in - paragraph_list], - {}).items()] + + for item in chat_record.details.values(): + if item.get('type') == 'search-knowledge-node' and item.get('show_knowledge', False): + paragraph_list = paragraph_list + item.get( + 'paragraph_list') + + if item.get('type') == 'reranker-node' and item.get('show_knowledge', False): + paragraph_list = paragraph_list + [rl.get('metadata') for rl in item.get('result_list') if + 'document_id' in rl.get('metadata') and 'knowledge_id' in rl.get( + 'metadata')] + paragraph_list = list({p.get('id'): p for p in paragraph_list}.values()) + knowledge_list = knowledge_list + [{'id': knowledge_id, **knowledge} for knowledge_id, knowledge in + reduce(lambda x, y: {**x, **y}, + [{row.get( + 'knowledge_id'): {'knowledge_name': row.get( + "knowledge_name"), + 'knowledge_type': row.get('knowledge_type')}} for + row in + paragraph_list], + {}).items()] + if len(chat_record.improve_paragraph_id_list) > 0: paragraph_model_list = QuerySet(Paragraph).filter(id__in=chat_record.improve_paragraph_id_list) if len(paragraph_model_list) < len(chat_record.improve_paragraph_id_list): @@ -126,14 +147,15 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer): filter(lambda p_id: paragraph_model_id_list.__contains__(p_id), chat_record.improve_paragraph_id_list)) chat_record.save() - + show_source_dict = {'knowledge_list': knowledge_list, + 'paragraph_list': paragraph_list, } + show_exec_dict = {'execution_details': [chat_record.details[key] for key in chat_record.details]} return { **ChatRecordSerializerModel(chat_record).data, 'padding_problem_text': chat_record.details.get('problem_padding').get( 'padding_problem_text') if 'problem_padding' in chat_record.details else None, - 'knowledge_list': knowledge_list, - 'paragraph_list': paragraph_list, - 'execution_details': [chat_record.details[key] for key in chat_record.details] + **(show_source_dict if show_source else {}), + **(show_exec_dict if show_exec else {}) } def page(self, current_page: int, page_size: int, with_valid=True): @@ -141,9 +163,17 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer): self.is_valid(raise_exception=True) order_by = '-create_time' if self.data.get('order_asc') is None or self.data.get( 'order_asc') else 'create_time' + application_access_token = QuerySet(ApplicationAccessToken).filter( + application_id=self.data.get('application_id')).first() + show_source = False + show_exec = False + if application_access_token is not None: + show_exec = application_access_token.show_exec + show_source = application_access_token.show_source page = page_search(current_page, page_size, QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by), - post_records_handler=lambda chat_record: self.reset_chat_record(chat_record)) + post_records_handler=lambda chat_record: self.reset_chat_record(chat_record, show_source, + show_exec)) return page diff --git a/apps/application/serializers/common.py b/apps/application/serializers/common.py index 1574b0d4b..30d974294 100644 --- a/apps/application/serializers/common.py +++ b/apps/application/serializers/common.py @@ -14,10 +14,12 @@ from django.db.models import QuerySet from django.utils.translation import gettext_lazy as _ from application.chat_pipeline.step.chat_step.i_chat_step import PostResponseHandler -from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType +from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType, ApplicationTypeChoices, \ + ApplicationKnowledgeMapping from common.constants.cache_version import Cache_Version from common.database_model_manage.database_model_manage import DatabaseModelManage from common.exception.app_exception import ChatException +from knowledge.models import Document from models_provider.models import Model from models_provider.tools import get_model_credential @@ -72,6 +74,19 @@ class ChatInfo: '-create_time')[0:1].first() if not application: raise ChatException(500, _("The application has not been published. Please use it after publishing.")) + if application.type == ApplicationTypeChoices.SIMPLE.value: + # 数据集id列表 + knowledge_id_list = [str(row.knowledge_id) for row in + QuerySet(ApplicationKnowledgeMapping).filter( + application_id=self.application_id)] + + # 需要排除的文档 + exclude_document_id_list = [str(document.id) for document in + QuerySet(Document).filter( + knowledge_id__in=knowledge_id_list, + is_active=False)] + self.knowledge_id_list = knowledge_id_list + self.exclude_document_id_list = exclude_document_id_list self.application = application return application diff --git a/apps/application/sql/list_knowledge_paragraph_by_paragraph_id.sql b/apps/application/sql/list_knowledge_paragraph_by_paragraph_id.sql index f31975e8a..70e034771 100644 --- a/apps/application/sql/list_knowledge_paragraph_by_paragraph_id.sql +++ b/apps/application/sql/list_knowledge_paragraph_by_paragraph_id.sql @@ -1,6 +1,7 @@ SELECT paragraph.*, knowledge."name" AS "knowledge_name", + knowledge."type" AS "knowledge_type", "document"."name" AS "document_name", "document"."meta" AS "meta", "document"."hit_handling_method" AS "hit_handling_method", diff --git a/apps/chat/serializers/chat_record.py b/apps/chat/serializers/chat_record.py index 4239e5b20..1e0183ec0 100644 --- a/apps/chat/serializers/chat_record.py +++ b/apps/chat/serializers/chat_record.py @@ -15,7 +15,8 @@ from rest_framework import serializers from application.models import VoteChoices, ChatRecord, Chat from application.serializers.application_chat import ChatCountSerializer -from application.serializers.application_chat_record import ChatRecordSerializerModel +from application.serializers.application_chat_record import ChatRecordSerializerModel, \ + ApplicationChatRecordQuerySerializers from common.db.search import page_search from common.exception.app_exception import AppApiException from common.utils.lock import try_lock, un_lock @@ -86,7 +87,8 @@ class HistoricalConversationSerializer(serializers.Serializer): def get_queryset(self): chat_user_id = self.data.get('chat_user_id') application_id = self.data.get("application_id") - return QuerySet(Chat).filter(application_id=application_id, chat_user_id=chat_user_id, is_deleted=False) + return QuerySet(Chat).filter(application_id=application_id, chat_user_id=chat_user_id, + is_deleted=False).order_by('-update_time') def list(self): self.is_valid(raise_exception=True) @@ -157,4 +159,6 @@ class HistoricalConversationRecordSerializer(serializers.Serializer): def page(self, current_page, page_size): self.is_valid(raise_exception=True) - return page_search(current_page, page_size, self.get_queryset(), lambda r: ChatRecordSerializerModel(r).data) + return ApplicationChatRecordQuerySerializers( + data={'application_id': self.data.get('application_id'), 'chat_id': self.data.get('chat_id')}).page( + current_page, page_size) diff --git a/ui/src/locales/lang/zh-CN/views/application-workflow.ts b/ui/src/locales/lang/zh-CN/views/application-workflow.ts index b4edecc74..2ed2a6912 100644 --- a/ui/src/locales/lang/zh-CN/views/application-workflow.ts +++ b/ui/src/locales/lang/zh-CN/views/application-workflow.ts @@ -133,6 +133,10 @@ export default { result: '检索结果', directly_return: '满足直接回答的分段内容', searchParam: '检索参数', + showKnowledge: { + label: '结果显示在知识来源中', + requiredMessage: '请设置参数', + }, searchQuestion: { label: '检索问题', placeholder: '请选择检索问题', diff --git a/ui/src/workflow/nodes/reranker-node/index.vue b/ui/src/workflow/nodes/reranker-node/index.vue index c76e93e51..93d452cf4 100644 --- a/ui/src/workflow/nodes/reranker-node/index.vue +++ b/ui/src/workflow/nodes/reranker-node/index.vue @@ -166,6 +166,14 @@ :model-type="'RERANKER'" > + + + @@ -198,6 +206,7 @@ const form = { similarity: 0, max_paragraph_char_number: 5000, }, + show_knowledge: false, } const modelOptions = ref(null) diff --git a/ui/src/workflow/nodes/search-knowledge-node/index.vue b/ui/src/workflow/nodes/search-knowledge-node/index.vue index 0e9883ce9..d59ae6fdd 100644 --- a/ui/src/workflow/nodes/search-knowledge-node/index.vue +++ b/ui/src/workflow/nodes/search-knowledge-node/index.vue @@ -106,6 +106,20 @@ v-model="form_data.question_reference_address" /> + + + @@ -142,6 +156,7 @@ const form = { search_mode: 'embedding', }, question_reference_address: [], + show_knowledge: false, } const form_data = computed({