MaxKB/apps/application/flow/i_step_node.py
shaohuzhang1 bfae088df6
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Typos Check / Spell Check with Typos (push) Waiting to run
feat: knowledge workflow (#4399)
* feat: init knowledge workflow

* feat: add knowledge workflow and version models, serializers, and API views

* feat: knowledge workflow

* feat: knowledge workflow

* feat: add KnowledgeWorkflowModelSerializer and Operate class for workflow management

* fix: route

* feat: knowledge workflow

* feat: Knowledge workflow permission

* feat: knowledge workflow

* feat: knowledge workflow

* feat: knowledge workflow

* feat: knowledge workflow

* feat: Data source web node

* fix: Back route

* feat: knowledge workflow

* feat: knowledge workflow

* feat: Knowledge write node

* feat: add Data Source tool functionality and localization

* feat: add Data Source tool functionality and localization

* feat: knowledge workflow

* feat: knowledge workflow

* fix: simplify export tool permission check in ToolListContainer.vue

* fix: simplify export condition in ToolResourceIndex.vue

* fix: simplify condition for copying tool in ToolListContainer

* feat: knowledge workflow

* fix: Upload local files and add output fields

* feat: Knowledge write

* feat: add Document Split Node functionality and localization

* feat: add Document Split Node functionality and localization

* feat: Knowledge write

* feat: enhance Document Split Node with result processing and problem list generation

* fix: Allow problem be blank

* feat: enhance Document Split Node with result processing and problem list generation

* feat: tool datasource

* fix: Optimization of knowledge base workflow execution logic

* refactor: streamline image handling by updating application and knowledge ID management

* refactor: streamline image handling by updating application and knowledge ID management

* feat: extend support modes in variable aggregation node to include knowledge workflows

* feat: Chunks stored

* refactor: simplify file handling in document extraction by removing unnecessary byte conversion and enhancing file saving logic

* refactor: update file ID assignment in document extraction to use provided metadata

* feat: Workflow menu that distinguishes between applications and knowledge bases

* refactor: update file ID assignment in document extraction to use provided metadata

* fix: Add workspace ID as workflow execution parameter

* feat: add code template for Data Source tool form functionality

* refactor: remove unused sys import and improve module handling

* feat: Execution details support loading status

* refactor: update tool type handling and improve category merging logic

* feat: Alter fork depth

* fix: ensure filterList is properly initialized and updated in getList function

* refactor: simplify ToolStoreDialog by removing unused toolType logic

* perf: Optimize the style

* style: adjust div width for improved layout in Tree component

* refactor: improve polling mechanism for knowledge workflow action

* fix: Get workspace_id from workflow params

* fix: filter out 'file_bytes' from result in get_details method

* feat: add recursive filtering for file_bytes in context data

* fix: append results to paragraph_list instead of replacing it

* perf: Optimize translation files

* fix: include document name in bytes_to_uploaded_file call for better file handling

* refactor: optimize buffer retrieval in document processing

* refactor: remove redundant parameter from bytes_to_uploaded_file call

* fix: Page style optimization

* feat: add slider for setting limit in document rules form

* feat: add workflow knowledge management endpoints and related functionality

* fix: swap file size and file count limits in form inputs

* refactor: update tool_config args to use list format for improved readability

* feat: Node supports knowledge base workflow

* feat: Node supports knowledge base workflow

* fix: Basic node data cannot be obtained in the workflow

* style: Knowledge base workflow debugging page style adjustment

* fix: Loop nodes cannot be used in the knowledge base workflow

* fix: Knowledge base workflow variable assignment node

* feat: add chunk size slider to form for custom split strategy

* fix: Workflow style optimization

---------

Co-authored-by: CaptainB <bin@fit2cloud.com>
Co-authored-by: zhangzhanwei <zhanwei.zhang@fit2cloud.com>
Co-authored-by: wangdan-fit2cloud <dan.wang@fit2cloud.com>
2025-11-28 15:38:20 +08:00

283 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
"""
@project: maxkb
@Author
@file i_step_node.py
@date2024/6/3 14:57
@desc:
"""
import time
import uuid
from abc import abstractmethod
from hashlib import sha1
from typing import Type, Dict, List
from django.core import cache
from django.db.models import QuerySet
from rest_framework import serializers
from rest_framework.exceptions import ValidationError, ErrorDetail
from application.flow.common import Answer, NodeChunk
from application.models import ApplicationChatUserStats
from application.models import ChatRecord, ChatUserType
from common.field.common import InstanceField
from knowledge.models.knowledge_action import KnowledgeAction, State
chat_cache = cache
def write_context(step_variable: Dict, global_variable: Dict, node, workflow):
if step_variable is not None:
for key in step_variable:
node.context[key] = step_variable[key]
if workflow.is_result(node, NodeResult(step_variable, global_variable)) and 'answer' in step_variable:
answer = step_variable['answer']
yield answer
node.answer_text = answer
if global_variable is not None:
for key in global_variable:
workflow.context[key] = global_variable[key]
node.context['run_time'] = time.time() - node.context['start_time']
def is_interrupt(node, step_variable: Dict, global_variable: Dict):
return node.type == 'form-node' and not node.context.get('is_submit', False)
class WorkFlowPostHandler:
def __init__(self, chat_info):
self.chat_info = chat_info
def handler(self, workflow):
workflow_body = workflow.get_body()
question = workflow_body.get('question')
chat_record_id = workflow_body.get('chat_record_id')
chat_id = workflow_body.get('chat_id')
details = workflow.get_runtime_details()
message_tokens = sum([row.get('message_tokens') for row in details.values() if
'message_tokens' in row and row.get('message_tokens') is not None])
answer_tokens = sum([row.get('answer_tokens') for row in details.values() if
'answer_tokens' in row and row.get('answer_tokens') is not None])
answer_text_list = workflow.get_answer_text_list()
answer_text = '\n\n'.join(
'\n\n'.join([a.get('content') for a in answer]) for answer in
answer_text_list)
if workflow.chat_record is not None:
chat_record = workflow.chat_record
chat_record.answer_text = answer_text
chat_record.details = details
chat_record.message_tokens = message_tokens
chat_record.answer_tokens = answer_tokens
chat_record.answer_text_list = answer_text_list
chat_record.run_time = time.time() - workflow.context['start_time']
else:
chat_record = ChatRecord(id=chat_record_id,
chat_id=chat_id,
problem_text=question,
answer_text=answer_text,
details=details,
message_tokens=message_tokens,
answer_tokens=answer_tokens,
answer_text_list=answer_text_list,
run_time=time.time() - workflow.context.get('start_time') if workflow.context.get(
'start_time') is not None else 0,
index=0)
self.chat_info.append_chat_record(chat_record)
self.chat_info.set_cache()
if not self.chat_info.debug and [ChatUserType.ANONYMOUS_USER.value, ChatUserType.CHAT_USER.value].__contains__(
workflow_body.get('chat_user_type')):
application_public_access_client = (QuerySet(ApplicationChatUserStats)
.filter(chat_user_id=workflow_body.get('chat_user_id'),
chat_user_type=workflow_body.get('chat_user_type'),
application_id=self.chat_info.application_id).first())
if application_public_access_client is not None:
application_public_access_client.access_num = application_public_access_client.access_num + 1
application_public_access_client.intraday_access_num = application_public_access_client.intraday_access_num + 1
application_public_access_client.save()
self.chat_info = None
class KnowledgeWorkflowPostHandler(WorkFlowPostHandler):
def __init__(self, chat_info, knowledge_action_id):
super().__init__(chat_info)
self.knowledge_action_id = knowledge_action_id
def handler(self, workflow):
QuerySet(KnowledgeAction).filter(id=self.knowledge_action_id).update(
state=State.SUCCESS)
class NodeResult:
def __init__(self, node_variable: Dict, workflow_variable: Dict,
_write_context=write_context, _is_interrupt=is_interrupt):
self._write_context = _write_context
self.node_variable = node_variable
self.workflow_variable = workflow_variable
self._is_interrupt = _is_interrupt
def write_context(self, node, workflow):
return self._write_context(self.node_variable, self.workflow_variable, node, workflow)
def is_assertion_result(self):
return 'branch_id' in self.node_variable
def is_interrupt_exec(self, current_node):
"""
是否中断执行
@param current_node:
@return:
"""
return self._is_interrupt(current_node, self.node_variable, self.workflow_variable)
class ReferenceAddressSerializer(serializers.Serializer):
node_id = serializers.CharField(required=True, label="节点id")
fields = serializers.ListField(
child=serializers.CharField(required=True, label="节点字段"), required=True,
label="节点字段数组")
class FlowParamsSerializer(serializers.Serializer):
# 历史对答
history_chat_record = serializers.ListField(child=InstanceField(model_type=ChatRecord, required=True),
label="历史对答")
question = serializers.CharField(required=True, label="用户问题")
chat_id = serializers.CharField(required=True, label="对话id")
chat_record_id = serializers.CharField(required=True, label="对话记录id")
stream = serializers.BooleanField(required=True, label="流式输出")
chat_user_id = serializers.CharField(required=False, label="对话用户id")
chat_user_type = serializers.CharField(required=False, label="对话用户类型")
workspace_id = serializers.CharField(required=True, label="工作空间id")
application_id = serializers.CharField(required=True, label="应用id")
re_chat = serializers.BooleanField(required=True, label="换个答案")
debug = serializers.BooleanField(required=True, label="是否debug")
class KnowledgeFlowParamsSerializer(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label="知识库id")
workspace_id = serializers.CharField(required=True, label="工作空间id")
knowledge_action_id = serializers.UUIDField(required=True, label="知识库任务执行器id")
data_source = serializers.DictField(required=True, label="数据源")
knowledge_base = serializers.DictField(required=False, label="知识库设置")
class INode:
view_type = 'many_view'
@abstractmethod
def save_context(self, details, workflow_manage):
pass
def get_answer_list(self) -> List[Answer] | None:
if self.answer_text is None:
return None
reasoning_content_enable = self.context.get('model_setting', {}).get('reasoning_content_enable', False)
return [
Answer(self.answer_text, self.view_type, self.runtime_node_id, self.workflow_params.get('chat_record_id'),
{},
self.runtime_node_id, self.context.get('reasoning_content', '') if reasoning_content_enable else '')]
def __init__(self, node, workflow_params, workflow_manage, up_node_id_list=None,
get_node_params=lambda node: node.properties.get('node_data'), salt=None):
# 当前步骤上下文,用于存储当前步骤信息
self.status = 200
self.err_message = ''
self.node = node
self.node_params = get_node_params(node)
self.workflow_params = workflow_params
self.workflow_manage = workflow_manage
self.node_params_serializer = None
self.flow_params_serializer = None
self.context = {}
self.answer_text = None
self.id = node.id
if up_node_id_list is None:
up_node_id_list = []
self.up_node_id_list = up_node_id_list
self.node_chunk = NodeChunk()
self.runtime_node_id = sha1(uuid.NAMESPACE_DNS.bytes + bytes(str(uuid.uuid5(uuid.NAMESPACE_DNS,
"".join([*sorted(up_node_id_list),
node.id]))),
"utf-8")).hexdigest() + (
"__" + str(salt) if salt is not None else '')
def valid_args(self, node_params, flow_params):
flow_params_serializer_class = self.get_flow_params_serializer_class()
node_params_serializer_class = self.get_node_params_serializer_class()
if flow_params_serializer_class is not None and flow_params is not None:
self.flow_params_serializer = flow_params_serializer_class(data=flow_params)
self.flow_params_serializer.is_valid(raise_exception=True)
if node_params_serializer_class is not None:
self.node_params_serializer = node_params_serializer_class(data=node_params)
self.node_params_serializer.is_valid(raise_exception=True)
if self.node.properties.get('status', 200) != 200:
raise ValidationError(ErrorDetail(f'节点{self.node.properties.get("stepName")} 不可用'))
def get_reference_field(self, fields: List[str]):
return self.get_field(self.context, fields)
@staticmethod
def get_field(obj, fields: List[str]):
for field in fields:
value = obj.get(field)
if value is None:
return None
else:
obj = value
return obj
@abstractmethod
def get_node_params_serializer_class(self) -> Type[serializers.Serializer]:
pass
def get_flow_params_serializer_class(self) -> Type[serializers.Serializer]:
return self.workflow_manage.get_params_serializer_class()
def get_write_error_context(self, e):
self.status = 500
self.answer_text = str(e)
self.err_message = str(e)
current_time = time.time()
self.context['run_time'] = current_time - (self.context.get('start_time') or current_time)
def write_error_context(answer, status=200):
pass
return write_error_context
def run(self) -> NodeResult:
"""
:return: 执行结果
"""
start_time = time.time()
self.context['start_time'] = start_time
result = self._run()
self.context['run_time'] = time.time() - start_time
return result
def _run(self):
result = self.execute()
return result
def execute(self, **kwargs) -> NodeResult:
pass
def get_details(self, index: int, **kwargs):
"""
运行详情
:return: 步骤详情
"""
return {}