From fbe361929c23cb3a8e8e91fc442cbb121ca4aafa Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Mon, 10 Nov 2025 17:32:12 +0800 Subject: [PATCH] agent code --- packages/global/core/app/constants.ts | 2 +- packages/service/core/ai/llm/agentCall.ts | 493 ++---------------- .../constants.ts} | 0 .../service/core/ai/llm/compress/index.ts | 297 +++++++++++ .../service/core/ai/llm/compress/prompt.ts | 169 ++++++ .../workflow/dispatch/ai/agent/constants.ts | 120 ++--- .../core/workflow/dispatch/ai/agent/index.ts | 363 +++++-------- .../ai/agent/master/taskComplexity.ts | 84 +++ .../dispatch/ai/agent/sub/plan/index.ts | 14 +- .../dispatch/ai/agent/sub/plan/prompt.ts | 16 + .../common/Textarea/PromptEditor/utils.ts | 16 +- 11 files changed, 813 insertions(+), 761 deletions(-) rename packages/service/core/ai/llm/{compressionConstants.ts => compress/constants.ts} (100%) create mode 100644 packages/service/core/ai/llm/compress/index.ts create mode 100644 packages/service/core/ai/llm/compress/prompt.ts create mode 100644 packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts diff --git a/packages/global/core/app/constants.ts b/packages/global/core/app/constants.ts index 5129049eb..629ba16fc 100644 --- a/packages/global/core/app/constants.ts +++ b/packages/global/core/app/constants.ts @@ -61,9 +61,9 @@ export const defaultChatInputGuideConfig = { }; export const defaultAppSelectFileConfig: AppFileSelectConfigType = { + maxFiles: 10, canSelectFile: false, canSelectImg: false, - maxFiles: 10, canSelectVideo: false, canSelectAudio: false, canSelectCustomFileExtension: false, diff --git a/packages/service/core/ai/llm/agentCall.ts b/packages/service/core/ai/llm/agentCall.ts index 56a1d0a60..a20d517a0 100644 --- a/packages/service/core/ai/llm/agentCall.ts +++ b/packages/service/core/ai/llm/agentCall.ts @@ -17,12 +17,13 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index'; import { addLog } from '../../../common/system/log'; import type { AgentPlanStepType } from '../../workflow/dispatch/ai/agent/sub/plan/type'; -import { calculateCompressionThresholds } from './compressionConstants'; +import { calculateCompressionThresholds } from './compress/constants'; +import { compressRequestMessages, compressToolcallResponse } from './compress'; type RunAgentCallProps = { maxRunAgentTimes: number; interactiveEntryToolParams?: WorkflowInteractiveResponseType['toolParams']; - currentStep?: AgentPlanStepType; + currentStep: AgentPlanStepType; body: { messages: ChatCompletionMessageParam[]; @@ -61,440 +62,6 @@ type RunAgentResponse = { subAppUsages: ChatNodeUsageType[]; }; -/** - * Compress a single oversized tool response - * Integrates character reduction + chunk compression logic - */ -const compressSingleToolResponse = async ( - response: string, - model: LLMModelItemType, - toolName: string, - currentDescription: string, - maxTargetTokens: number = 4000 -): Promise => { - const originalTokens = await countPromptTokens(response); - - console.log( - `Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens` - ); - console.log('Response content preview:\n', response.slice(0, 1000)); - - // ============ Phase 1: Smart character reduction ============ - let reduced = response; - - // delete URL - reduced = reduced.replace(/https?:\/\/[^\s]+/g, ''); - - // delete base64 code - reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, ''); - reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, ''); - - // delete HTML/XML tag - reduced = reduced.replace(/<[^>]+>/g, ''); - - // delete Markdown images - reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, ''); - - reduced = reduced.replace( - /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu, - '' - ); - - // Compress whitespace - reduced = reduced.replace(/\n{3,}/g, '\n\n'); - reduced = reduced.replace(/ {2,}/g, ' '); - reduced = reduced.replace(/\t+/g, ' '); - - // Remove duplicate separators - reduced = reduced.replace(/[-=_*#]{5,}/g, '---'); - - // Deduplicate consecutive identical lines - const allLines = reduced.split('\n'); - const deduplicatedLines: string[] = []; - let lastLine = ''; - for (const line of allLines) { - const trimmed = line.trim(); - if (trimmed !== lastLine || trimmed === '') { - deduplicatedLines.push(line); - lastLine = trimmed; - } - } - reduced = deduplicatedLines.join('\n').trim(); - - let currentTokens = await countPromptTokens(reduced); - addLog.info(`After character reduction`, { - tool: toolName, - before: originalTokens, - after: currentTokens, - saved: originalTokens - currentTokens - }); - console.log('After character reduction - content preview:\n', reduced.slice(0, 1000)); - // 2. If reduction meets the requirement, return directly - if (currentTokens <= maxTargetTokens) { - return reduced; - } - - // ============ Phase 2: Chunk compression ============ - const thresholds = calculateCompressionThresholds(model.maxContext); - const chunkMaxTokens = thresholds.chunkSize; - - if (currentTokens <= chunkMaxTokens) { - const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。 - 任务: ${currentDescription} - 工具: ${toolName} - 要求: - - 保留关键数据、结论、错误信息 - - 删除冗余描述、重复内容 - - 格式简洁 - 直接输出压缩文本。 - ${reduced}`; - - try { - const { answerText } = await createLLMResponse({ - body: { - model, - messages: [ - { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt }, - { - role: ChatCompletionRequestMessageRoleEnum.User, - content: '请按照目标的 token 数量进行压缩' - } - ], - temperature: 0.1, - stream: false - } - }); - - if (answerText) { - reduced = answerText; - currentTokens = await countPromptTokens(reduced); - } - } catch (error) { - addLog.error(`LLM 压缩失败: ${toolName}`, error); - } - - addLog.info(`压缩完成`, { - tool: toolName, - final: currentTokens, - ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%` - }); - console.log('LLM 压缩后-内容预览:\n', reduced); - return reduced; - } - - const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens); - const chunkSize = Math.ceil(reduced.length / targetChunkCount); - const chunks: string[] = []; - - for (let i = 0; i < targetChunkCount; i++) { - const start = i * chunkSize; - const end = Math.min(start + chunkSize, reduced.length); - chunks.push(reduced.substring(start, end)); - } - - addLog.info(`分块压缩信息:`, { - currentTokens: currentTokens, - tool: toolName, - chunkslength: chunks.length, - chunks: chunks - }); - - const targetPerChunk = Math.floor(maxTargetTokens / chunks.length); - - const compressPromises = chunks.map(async (chunk, idx) => { - const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。 - - 任务: ${currentDescription} - 处理: ${toolName}-块${idx + 1}/${chunks.length} - - 要求: - - 保留关键数据、结论、错误 - - 删除冗余、重复内容 - - 格式简洁 - - 直接输出压缩文本。 - - ${chunk}`; - - try { - const { answerText } = await createLLMResponse({ - body: { - model, - messages: [ - { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt }, - { - role: ChatCompletionRequestMessageRoleEnum.User, - content: '请按照目标的 token 数量进行压缩' - } - ], - temperature: 0.1, - stream: false - } - }); - - return answerText || chunk; - } catch (error) { - addLog.error(`块${idx + 1}压缩失败`, error); - return chunk; - } - }); - - const compressedChunks = await Promise.all(compressPromises); - reduced = compressedChunks.join('\n\n'); - - currentTokens = await countPromptTokens(reduced); - addLog.info(`分块压缩完成`, { - tool: toolName, - step1: originalTokens, - final: currentTokens, - ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`, - reduced: reduced - }); - - return reduced; -}; - -/** - * 压缩 Agent 对话历史 - * 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩 - */ -const compressAgentMessages = async ( - messages: ChatCompletionMessageParam[], - model: LLMModelItemType, - currentDescription: string -): Promise => { - if (!messages || messages.length === 0) return messages; - - const tokenCount = await countGptMessagesTokens(messages); - const thresholds = calculateCompressionThresholds(model.maxContext); - const maxTokenThreshold = thresholds.agentMessages.threshold; - - addLog.debug('Agent messages token check', { - tokenCount, - maxTokenThreshold, - needCompress: tokenCount > maxTokenThreshold - }); - - const messagesJson = JSON.stringify(messages, null, 2); - - if (tokenCount <= maxTokenThreshold) { - console.log('messages 无需压缩,共', messages.length, '条消息'); - return messages; - } - - const targetTokens = Math.round(tokenCount * thresholds.agentMessages.targetRatio); - - addLog.info('Start compressing agent messages', { - originalTokens: tokenCount, - targetTokens, - compressionRatio: thresholds.agentMessages.targetRatio - }); - - const systemPrompt = `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。 - - ## 当前任务目标 - ${currentDescription} - - ## 压缩目标(最高优先级) - - **原始 token 数**: ${tokenCount} tokens - - **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%) - - **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens - - --- - - ## 三阶段压缩工作流 - - ### 【第一阶段:扫描与标注】(内部思考,不输出) - - 在开始压缩前,请先在内心完成以下分析: - - 1. **构建 ID 映射表** - - 扫描所有 assistant 消息中的 tool_calls,提取每个 tool_call 的 id - - 找到对应的 tool 消息的 tool_call_id - - 建立一一对应的映射关系表,例如: - \`\`\` - call_abc123 → tool 消息 #5 - call_def456 → tool 消息 #7 - \`\`\` - - 2. **评估消息相关性** - 根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级: - - **[高]**: 直接支撑任务目标,包含关键数据/结论 - - **[中]**: 间接相关,提供背景信息 - - **[低]**: 弱相关或无关,可大幅精简或删除 - - 3. **确定压缩策略** - - **system 消息**:保持完整,不做修改 - - 高相关消息:保留 70-90% 内容(精简冗余表达) - - 中等相关消息:保留 30-50% 内容(提炼核心要点) - - 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结) - - --- - - ### 【第二阶段:执行压缩】 - - 基于第一阶段的分析,执行压缩操作: - - **压缩原则**: - 1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改 - 2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段 - 3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现 - 4. **大幅精简 content**: - - tool 消息的 content:删除冗长描述、重复信息,只保留核心结论和关键数据 - - 合并相似的工具结果(但保留各自的 tool_call_id) - 5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除 - - **压缩技巧**: - - 删除:详细过程描述、重复信息、失败尝试、调试日志 - - 保留:具体数据、关键结论、错误信息、链接引用 - - 精简:用"核心发现:A、B、C"代替长篇叙述 - - --- - - ### 【第三阶段:自校验】 - - 输出前,必须检查: - - 1. **ID 一致性校验** - - 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息? - - 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到? - - 是否所有 ID 都原样保留,没有修改或生成新 ID? - - 2. **压缩比例校验** - - 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens? - - 如果超出目标,需进一步精简 content 字段 - - 3. **格式完整性校验** - - 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段? - - JSON 结构是否正确? - - --- - - ## 输出格式 - - 请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块): - - \`\`\`json - { - "compressed_messages": [ - {"role": "system", "content": "系统指令(精简后)"}, - {"role": "user", "content": "用户请求"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - { - "id": "call_原始ID", - "type": "function", - "function": { - "name": "工具名", - "arguments": "{\\"param\\":\\"精简后的值\\"}" - } - } - ] - }, - { - "role": "tool", - "tool_call_id": "call_原始ID", - "content": "工具返回的核心结果(已大幅精简,只保留关键信息)" - } - ], - "compression_summary": "原始${tokenCount}tokens → 约X tokens (压缩比例Y%)。操作:删除了Z条低相关消息,精简了N个工具响应。ID映射关系已验证正确。" - } - \`\`\` - - --- - - ## 压缩示例 - - **示例 1:工具调用压缩** - - 原始(500+ tokens): - \`\`\`json - [ - {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]}, - {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章:\\n1. 标题:Python性能优化完整指南\\n 作者:张三\\n 发布时间:2024-01-15\\n 摘要:本文详细介绍了Python性能优化的各种技巧,包括...(此处省略400字详细内容)\\n URL: https://example.com/article1\\n2. 标题:..."} - ] - \`\`\` - - 压缩后(100 tokens): - \`\`\`json - [ - {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]}, - {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现:①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"} - ] - \`\`\` - - **示例 2:相似内容合并** - - 如果有多个相似的搜索结果,可以合并 content,但必须保留各自的 ID 映射。 - - --- - - ## 待压缩的对话历史 - - ${messagesJson} - - --- - - 请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。`; - - const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。'; - - try { - const { answerText } = await createLLMResponse({ - body: { - model, - messages: [ - { - role: ChatCompletionRequestMessageRoleEnum.System, - content: systemPrompt - }, - { - role: ChatCompletionRequestMessageRoleEnum.User, - content: userPrompt - } - ], - temperature: 0.1, - stream: false - } - }); - - if (!answerText) { - addLog.warn('Compression failed: empty response, return original messages'); - return messages; - } - - const jsonMatch = - answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/); - if (!jsonMatch) { - addLog.warn('Compression failed: cannot parse JSON, return original messages'); - return messages; - } - - const jsonText = jsonMatch[1] || jsonMatch[0]; - const parsed = JSON.parse(jsonText); - - if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) { - addLog.warn('Compression failed: invalid format, return original messages'); - return messages; - } - - const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages); - addLog.info('Agent messages compressed successfully', { - originalTokens: tokenCount, - compressedTokens, - actualRatio: (compressedTokens / tokenCount).toFixed(2), - summary: parsed.compression_summary - }); - - return parsed.compressed_messages as ChatCompletionMessageParam[]; - } catch (error) { - addLog.error('Compression failed', error); - return messages; - } -}; - export const runAgentCall = async ({ maxRunAgentTimes, interactiveEntryToolParams, @@ -528,6 +95,12 @@ export const runAgentCall = async ({ // TODO: 费用检测 runTimes++; + // 对请求的 requestMessages 进行压缩 + const taskDescription = currentStep.description || currentStep.title; + if (taskDescription) { + requestMessages = await compressRequestMessages(requestMessages, model, taskDescription); + } + // Request LLM let { reasoningText: reasoningContent, @@ -565,29 +138,40 @@ export const runAgentCall = async ({ for await (const tool of toolCalls) { // TODO: 加入交互节点处理 + + // Call tool and compress tool response const { response, usages, interactive } = await handleToolResponse({ call: tool, messages: requestMessages.slice(0, requestMessagesLength) - }); + }).then(async (res) => { + const thresholds = calculateCompressionThresholds(model.maxContext); + const toolTokenCount = await countPromptTokens(res.response); - let finalResponse = response; - const thresholds = calculateCompressionThresholds(model.maxContext); - const toolTokenCount = await countPromptTokens(response); - if (toolTokenCount > thresholds.singleTool.threshold && currentStep) { - const taskDescription = currentStep.description || currentStep.title; - finalResponse = await compressSingleToolResponse( - response, - model, - tool.function.name, - taskDescription, - thresholds.singleTool.target - ); - } + const response = await (async () => { + if (toolTokenCount > thresholds.singleTool.threshold && currentStep) { + const taskDescription = currentStep.description || currentStep.title; + return await compressToolcallResponse( + res.response, + model, + tool.function.name, + taskDescription, + thresholds.singleTool.target + ); + } else { + return res.response; + } + })(); + + return { + ...res, + response + }; + }); requestMessages.push({ tool_call_id: tool.id, role: ChatCompletionRequestMessageRoleEnum.Tool, - content: finalResponse + content: response }); subAppUsages.push(...usages); @@ -597,18 +181,11 @@ export const runAgentCall = async ({ } } - if (toolCalls.length > 0 && currentStep) { - const taskDescription = currentStep.description || currentStep.title; - if (taskDescription) { - requestMessages = await compressAgentMessages(requestMessages, model, taskDescription); - } - } // TODO: 移动到工作流里 assistantResponses concat const currentAssistantResponses = GPTMessages2Chats({ messages: requestMessages.slice(requestMessagesLength), getToolInfo })[0] as AIChatItemType; - if (currentAssistantResponses) { assistantResponses.push(...currentAssistantResponses.value); } diff --git a/packages/service/core/ai/llm/compressionConstants.ts b/packages/service/core/ai/llm/compress/constants.ts similarity index 100% rename from packages/service/core/ai/llm/compressionConstants.ts rename to packages/service/core/ai/llm/compress/constants.ts diff --git a/packages/service/core/ai/llm/compress/index.ts b/packages/service/core/ai/llm/compress/index.ts new file mode 100644 index 000000000..24ee1daee --- /dev/null +++ b/packages/service/core/ai/llm/compress/index.ts @@ -0,0 +1,297 @@ +import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; +import { countGptMessagesTokens, countPromptTokens } from '../../../../common/string/tiktoken'; +import { addLog } from 'common/system/log'; +import { calculateCompressionThresholds } from './constants'; +import { createLLMResponse } from '../request'; +import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type'; +import { getCompressRequestMessagesPrompt } from './prompt'; + +/** + * Compress a single oversized tool response + * Integrates character reduction + chunk compression logic + */ +export const compressToolcallResponse = async ( + response: string, + model: LLMModelItemType, + toolName: string, + currentDescription: string, + maxTargetTokens: number = 4000 +): Promise => { + const originalTokens = await countPromptTokens(response); + + console.log( + `Start single tool compression ${toolName}: ${originalTokens} tokens → target ${maxTargetTokens} tokens` + ); + console.log('Response content preview:\n', response.slice(0, 1000)); + + // ============ Phase 1: Smart character reduction ============ + let reduced = response; + + // delete URL + reduced = reduced.replace(/https?:\/\/[^\s]+/g, ''); + + // delete base64 code + reduced = reduced.replace(/data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g, ''); + reduced = reduced.replace(/base64,[A-Za-z0-9+/=]{50,}/g, ''); + + // delete HTML/XML tag + reduced = reduced.replace(/<[^>]+>/g, ''); + + // delete Markdown images + reduced = reduced.replace(/!\[([^\]]*)\]\([^\)]+\)/g, ''); + + reduced = reduced.replace( + /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu, + '' + ); + + // Compress whitespace + reduced = reduced.replace(/\n{3,}/g, '\n\n'); + reduced = reduced.replace(/ {2,}/g, ' '); + reduced = reduced.replace(/\t+/g, ' '); + + // Remove duplicate separators + reduced = reduced.replace(/[-=_*#]{5,}/g, '---'); + + // Deduplicate consecutive identical lines + const allLines = reduced.split('\n'); + const deduplicatedLines: string[] = []; + let lastLine = ''; + for (const line of allLines) { + const trimmed = line.trim(); + if (trimmed !== lastLine || trimmed === '') { + deduplicatedLines.push(line); + lastLine = trimmed; + } + } + reduced = deduplicatedLines.join('\n').trim(); + + let currentTokens = await countPromptTokens(reduced); + addLog.info(`After character reduction`, { + tool: toolName, + before: originalTokens, + after: currentTokens, + saved: originalTokens - currentTokens + }); + console.log('After character reduction - content preview:\n', reduced.slice(0, 1000)); + // 2. If reduction meets the requirement, return directly + if (currentTokens <= maxTargetTokens) { + return reduced; + } + + // ============ Phase 2: Small chunk compression ============ + const thresholds = calculateCompressionThresholds(model.maxContext); + const chunkMaxTokens = thresholds.chunkSize; + + if (currentTokens <= chunkMaxTokens) { + const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${maxTargetTokens} tokens。 + 任务: ${currentDescription} + 工具: ${toolName} + 要求: + - 保留关键数据、结论、错误信息 + - 删除冗余描述、重复内容 + - 格式简洁 + 直接输出压缩文本。 + ${reduced}`; + + try { + const { answerText } = await createLLMResponse({ + body: { + model, + messages: [ + { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt }, + { + role: ChatCompletionRequestMessageRoleEnum.User, + content: '请按照目标的 token 数量进行压缩' + } + ], + temperature: 0.1, + stream: false + } + }); + + if (answerText) { + reduced = answerText; + currentTokens = await countPromptTokens(reduced); + } + } catch (error) { + addLog.error(`LLM 压缩失败: ${toolName}`, error); + } + + addLog.info(`压缩完成`, { + tool: toolName, + final: currentTokens, + ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%` + }); + console.log('LLM 压缩后-内容预览:\n', reduced); + return reduced; + } + + // ============ Phase 3: Large Chunk compression ============ + const targetChunkCount = Math.ceil(currentTokens / chunkMaxTokens); + const chunkSize = Math.ceil(reduced.length / targetChunkCount); + const chunks: string[] = []; + + for (let i = 0; i < targetChunkCount; i++) { + const start = i * chunkSize; + const end = Math.min(start + chunkSize, reduced.length); + chunks.push(reduced.substring(start, end)); + } + + addLog.info(`分块压缩信息:`, { + currentTokens: currentTokens, + tool: toolName, + chunkslength: chunks.length, + chunks: chunks + }); + + const targetPerChunk = Math.floor(maxTargetTokens / chunks.length); + + const compressedChunks = await Promise.all( + chunks.map(async (chunk, idx) => { + const systemPrompt = `你是内容压缩专家。将以下内容压缩到约 ${targetPerChunk} tokens。 + + 任务: ${currentDescription} + 处理: ${toolName}-块${idx + 1}/${chunks.length} + + 要求: + - 保留关键数据、结论、错误 + - 删除冗余、重复内容 + - 格式简洁 + + 直接输出压缩文本。 + + ${chunk}`; + + try { + const { answerText } = await createLLMResponse({ + body: { + model, + messages: [ + { role: ChatCompletionRequestMessageRoleEnum.System, content: systemPrompt }, + { + role: ChatCompletionRequestMessageRoleEnum.User, + content: '请按照目标的 token 数量进行压缩' + } + ], + temperature: 0.1, + stream: false + } + }); + + return answerText || chunk; + } catch (error) { + addLog.error(`块${idx + 1}压缩失败`, error); + return chunk; + } + }) + ); + + reduced = compressedChunks.join('\n\n'); + + currentTokens = await countPromptTokens(reduced); + addLog.info(`分块压缩完成`, { + tool: toolName, + step1: originalTokens, + final: currentTokens, + ratio: `${((currentTokens / originalTokens) * 100).toFixed(1)}%`, + reduced: reduced + }); + + return reduced; +}; + +/** + * 压缩 Agent 对话历史 + * 当 messages 的 token 长度超过阈值时,调用 LLM 进行压缩 + */ +export const compressRequestMessages = async ( + messages: ChatCompletionMessageParam[], + model: LLMModelItemType, + currentDescription: string +): Promise => { + if (!messages || messages.length === 0) return messages; + + const tokenCount = await countGptMessagesTokens(messages); + const thresholds = calculateCompressionThresholds(model.maxContext); + const maxTokenThreshold = thresholds.agentMessages.threshold; + + addLog.debug('Agent messages token check', { + tokenCount, + maxTokenThreshold, + needCompress: tokenCount > maxTokenThreshold + }); + + if (tokenCount <= maxTokenThreshold) { + console.log('messages 无需压缩,共', messages.length, '条消息'); + return messages; + } + + addLog.info('Start compressing agent messages', { + originalTokens: tokenCount, + compressionRatio: thresholds.agentMessages.targetRatio + }); + + const { prompt: systemPrompt } = await getCompressRequestMessagesPrompt({ + currentDescription, + messages, + rawTokens: tokenCount, + model + }); + + const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。'; + + try { + const { answerText } = await createLLMResponse({ + body: { + model, + messages: [ + { + role: ChatCompletionRequestMessageRoleEnum.System, + content: systemPrompt + }, + { + role: ChatCompletionRequestMessageRoleEnum.User, + content: userPrompt + } + ], + temperature: 0.1, + stream: false + } + }); + + if (!answerText) { + addLog.warn('Compression failed: empty response, return original messages'); + return messages; + } + + const jsonMatch = + answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + addLog.warn('Compression failed: cannot parse JSON, return original messages'); + return messages; + } + + const jsonText = jsonMatch[1] || jsonMatch[0]; + const parsed = JSON.parse(jsonText); + + if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) { + addLog.warn('Compression failed: invalid format, return original messages'); + return messages; + } + + const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages); + addLog.info('Agent messages compressed successfully', { + originalTokens: tokenCount, + compressedTokens, + actualRatio: (compressedTokens / tokenCount).toFixed(2), + summary: parsed.compression_summary + }); + + return parsed.compressed_messages as ChatCompletionMessageParam[]; + } catch (error) { + addLog.error('Compression failed', error); + return messages; + } +}; diff --git a/packages/service/core/ai/llm/compress/prompt.ts b/packages/service/core/ai/llm/compress/prompt.ts new file mode 100644 index 000000000..1d1ffb717 --- /dev/null +++ b/packages/service/core/ai/llm/compress/prompt.ts @@ -0,0 +1,169 @@ +import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; +import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type'; +import { calculateCompressionThresholds } from './constants'; + +export const getCompressRequestMessagesPrompt = async ({ + currentDescription, + rawTokens, + messages, + model +}: { + currentDescription: string; + messages: ChatCompletionMessageParam[]; + rawTokens: number; + model: LLMModelItemType; +}) => { + const thresholds = calculateCompressionThresholds(model.maxContext); + const targetTokens = Math.round(rawTokens * thresholds.agentMessages.targetRatio); + + return { + prompt: `你是 Agent 对话历史压缩专家。你的任务是将对话历史压缩到目标 token 数,同时确保工具调用的 ID 映射关系完全正确。 + + ## 当前任务目标 + ${currentDescription} + + ## 压缩目标(最高优先级) + - **原始 token 数**: ${rawTokens} tokens + - **目标 token 数**: ${targetTokens} tokens (压缩比例: ${Math.round(thresholds.agentMessages.targetRatio * 100)}%) + - **约束**: 输出的 JSON 内容必须接近 ${targetTokens} tokens + + --- + + ## 三阶段压缩工作流 + + ### 【第一阶段:扫描与标注】(内部思考,不输出) + + 在开始压缩前,请先在内心完成以下分析: + + 1. **构建 ID 映射表** + - 扫描所有 assistant 消息中的 tool_calls,提取每个 tool_call 的 id + - 找到对应的 tool 消息的 tool_call_id + - 建立一一对应的映射关系表,例如: + \`\`\` + call_abc123 → tool 消息 #5 + call_def456 → tool 消息 #7 + \`\`\` + + 2. **评估消息相关性** + 根据当前任务目标「${currentDescription}」,为每条消息标注相关性等级: + - **[高]**: 直接支撑任务目标,包含关键数据/结论 + - **[中]**: 间接相关,提供背景信息 + - **[低]**: 弱相关或无关,可大幅精简或删除 + + 3. **确定压缩策略** + - **system 消息**:保持完整,不做修改 + - 高相关消息:保留 70-90% 内容(精简冗余表达) + - 中等相关消息:保留 30-50% 内容(提炼核心要点) + - 低相关消息:保留 10-20% 内容或删除(仅保留一句话总结) + + --- + + ### 【第二阶段:执行压缩】 + + 基于第一阶段的分析,执行压缩操作: + + **压缩原则**: + 1. **ID 不可变**: 所有 tool_call 的 id 和 tool_call_id 必须原样保留,绝不修改 + 2. **结构完整**: 每个 tool_call 对象必须包含 \`id\`, \`type\`, \`function\` 字段 + 3. **顺序保持**: assistant 的 tool_calls 和对应的 tool 响应按原始顺序出现 + 4. **大幅精简 content**: + - tool 消息的 content:删除冗长描述、重复信息,只保留核心结论和关键数据 + - 合并相似的工具结果(但保留各自的 tool_call_id) + 5. **目标优先**: 围绕任务目标压缩,与目标无关的消息可删除 + + **压缩技巧**: + - 删除:详细过程描述、重复信息、失败尝试、调试日志 + - 保留:具体数据、关键结论、错误信息、链接引用 + - 精简:用"核心发现:A、B、C"代替长篇叙述 + + --- + + ### 【第三阶段:自校验】 + + 输出前,必须检查: + + 1. **ID 一致性校验** + - 每个 assistant 消息中的 tool_calls[i].id 是否有对应的 tool 消息? + - 每个 tool 消息的 tool_call_id 是否能在前面的 assistant 消息中找到? + - 是否所有 ID 都原样保留,没有修改或生成新 ID? + + 2. **压缩比例校验** + - 估算输出的 JSON 字符串长度,是否接近 ${targetTokens} tokens? + - 如果超出目标,需进一步精简 content 字段 + + 3. **格式完整性校验** + - 所有 tool_call 对象是否包含完整的 \`id\`, \`type\`, \`function\` 字段? + - JSON 结构是否正确? + + --- + + ## 输出格式 + + 请按照以下 JSON 格式输出(必须使用 \`\`\`json 代码块): + + \`\`\`json + { + "compressed_messages": [ + {"role": "system", "content": "系统指令(精简后)"}, + {"role": "user", "content": "用户请求"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_原始ID", + "type": "function", + "function": { + "name": "工具名", + "arguments": "{\\"param\\":\\"精简后的值\\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_原始ID", + "content": "工具返回的核心结果(已大幅精简,只保留关键信息)" + } + ], + "compression_summary": "原始${rawTokens}tokens → 约X tokens (压缩比例Y%)。操作:删除了Z条低相关消息,精简了N个工具响应。ID映射关系已验证正确。" + } + \`\`\` + + --- + + ## 压缩示例 + + **示例 1:工具调用压缩** + + 原始(500+ tokens): + \`\`\`json + [ + {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化完整指南\\",\\"max_results\\":10}"}}]}, + {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章:\\n1. 标题:Python性能优化完整指南\\n 作者:张三\\n 发布时间:2024-01-15\\n 摘要:本文详细介绍了Python性能优化的各种技巧,包括...(此处省略400字详细内容)\\n URL: https://example.com/article1\\n2. 标题:..."} + ] + \`\`\` + + 压缩后(100 tokens): + \`\`\`json + [ + {"role": "assistant", "tool_calls": [{"id": "call_abc", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\":\\"Python性能优化\\"}"}}]}, + {"role": "tool", "tool_call_id": "call_abc", "content": "找到10篇文章。核心发现:①Cython可提升30%性能 ②NumPy向量化比循环快10倍 ③使用__slots__节省内存"} + ] + \`\`\` + + **示例 2:相似内容合并** + + 如果有多个相似的搜索结果,可以合并 content,但必须保留各自的 ID 映射。 + + --- + + ## 待压缩的对话历史 + + ${JSON.stringify(messages, null, 2)} + + --- + + 请严格按照三阶段工作流执行,确保 ID 映射关系完全正确,输出接近目标 token 数。` + }; +}; diff --git a/packages/service/core/workflow/dispatch/ai/agent/constants.ts b/packages/service/core/workflow/dispatch/ai/agent/constants.ts index 4ba7cf01b..84ed862dc 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/constants.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/constants.ts @@ -5,33 +5,46 @@ import { countPromptTokens } from '../../../../../common/string/tiktoken/index'; import { createLLMResponse } from '../../../../ai/llm/request'; import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; import { addLog } from '../../../../../common/system/log'; -import { calculateCompressionThresholds } from '../../../../ai/llm/compressionConstants'; +import { calculateCompressionThresholds } from '../../../../ai/llm/compress/constants'; -/** - * 压缩步骤提示词(Depends on) - * 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12% - */ -const compressStepPrompt = async ( - stepPrompt: string, - model: string, - currentDescription: string -): Promise => { - if (!stepPrompt) return stepPrompt; +export const getMasterAgentSystemPrompt = async ({ + steps, + step, + userInput, + background = '', + model +}: { + steps: AgentPlanStepType[]; + step: AgentPlanStepType; + userInput: string; + background?: string; + model: string; +}) => { + /** + * 压缩步骤提示词(Depends on) + * 当 stepPrompt 的 token 长度超过模型最大长度的 15% 时,调用 LLM 压缩到 12% + */ + const compressStepPrompt = async ( + stepPrompt: string, + model: string, + currentDescription: string + ): Promise => { + if (!stepPrompt) return stepPrompt; - const modelData = getLLMModel(model); - if (!modelData) return stepPrompt; + const modelData = getLLMModel(model); + if (!modelData) return stepPrompt; - const tokenCount = await countPromptTokens(stepPrompt); - const thresholds = calculateCompressionThresholds(modelData.maxContext); - const maxTokenThreshold = thresholds.dependsOn.threshold; + const tokenCount = await countPromptTokens(stepPrompt); + const thresholds = calculateCompressionThresholds(modelData.maxContext); + const maxTokenThreshold = thresholds.dependsOn.threshold; - if (tokenCount <= maxTokenThreshold) { - return stepPrompt; - } + if (tokenCount <= maxTokenThreshold) { + return stepPrompt; + } - const targetTokens = thresholds.dependsOn.target; + const targetTokens = thresholds.dependsOn.target; - const compressionSystemPrompt = ` + const compressionSystemPrompt = ` 你是工作流步骤历史压缩专家,擅长从多个已执行步骤的结果中提取关键信息。 你的任务是对工作流的执行历史进行智能压缩,在保留关键信息的同时,大幅降低 token 消耗。 @@ -89,7 +102,7 @@ const compressStepPrompt = async ( 4. 步骤的时序关系是否清晰? `; - const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。 + const userPrompt = `请对以下工作流步骤的执行历史进行压缩,保留与当前任务最相关的信息。 **当前任务目标**:${currentDescription} @@ -116,46 +129,33 @@ ${stepPrompt} 请直接输出压缩后的步骤历史:`; - try { - const { answerText } = await createLLMResponse({ - body: { - model: modelData, - messages: [ - { - role: ChatCompletionRequestMessageRoleEnum.System, - content: compressionSystemPrompt - }, - { - role: ChatCompletionRequestMessageRoleEnum.User, - content: userPrompt - } - ], - temperature: 0.1, - stream: false - } - }); + try { + const { answerText } = await createLLMResponse({ + body: { + model: modelData, + messages: [ + { + role: ChatCompletionRequestMessageRoleEnum.System, + content: compressionSystemPrompt + }, + { + role: ChatCompletionRequestMessageRoleEnum.User, + content: userPrompt + } + ], + temperature: 0.1, + stream: false + } + }); - return answerText || stepPrompt; - } catch (error) { - console.error('压缩 stepPrompt 失败:', error); - // 压缩失败时返回原始内容 - return stepPrompt; - } -}; + return answerText || stepPrompt; + } catch (error) { + console.error('压缩 stepPrompt 失败:', error); + // 压缩失败时返回原始内容 + return stepPrompt; + } + }; -export const getMasterAgentSystemPrompt = async ({ - steps, - step, - userInput, - background = '', - model -}: { - steps: AgentPlanStepType[]; - step: AgentPlanStepType; - userInput: string; - background?: string; - model: string; -}) => { let stepPrompt = steps .filter((item) => step.depends_on && step.depends_on.includes(item.id)) .map( diff --git a/packages/service/core/workflow/dispatch/ai/agent/index.ts b/packages/service/core/workflow/dispatch/ai/agent/index.ts index 808283370..98554b686 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/index.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/index.ts @@ -32,6 +32,7 @@ import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type import { addLog } from '../../../../../common/system/log'; import { createLLMResponse } from '../../../../ai/llm/request'; import { parseToolArgs } from '../utils'; +import { checkTaskComplexity } from './master/taskComplexity'; export type DispatchAgentModuleProps = ModuleDispatchProps<{ [NodeInputKeyEnum.history]?: ChatItemType[]; @@ -86,7 +87,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise } = props; const agentModel = getLLMModel(model); const chatHistories = getHistories(history, histories); - console.log('userChatInput', userChatInput); + const planMessagesKey = `planMessages-${nodeId}`; const replanMessagesKey = `replanMessages-${nodeId}`; const agentPlanKey = `agentPlan-${nodeId}`; @@ -114,9 +115,11 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise })(); // Plan step: 需要生成 plan,且还没有完整的 plan - const isPlanStep = isPlanAgent && (planHistoryMessages || !agentPlan); + const isPlanStep = isPlanAgent && planHistoryMessages; // Replan step: 已有 plan,且有 replan 历史消息 const isReplanStep = isPlanAgent && agentPlan && replanMessages; + // Check task complexity: 第一次进入任务时候进行判断。(有 plan了,说明已经开始执行任务了) + const isCheckTaskComplexityStep = !agentPlan && !isPlanStep; try { // Get files @@ -138,14 +141,96 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise filesMap }); - const planCallFn = async () => { - // Confirm 操作 - console.log(lastInteractive, interactiveInput, '\n Plan step'); - if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) { - planHistoryMessages = undefined; - } else { + /* ===== Check task complexity ===== */ + const { + complex: taskIsComplexity, + inputTokens: taskComplexInputTokens, + outputTokens: taskComplexOutputTokens + } = await (async () => { + if (isCheckTaskComplexityStep) { + return await checkTaskComplexity({ + model, + userChatInput + }); + } + + // 对轮运行时候,代表都是进入复杂流程 + return { + complex: true, + inputTokens: 0, + outputTokens: 0 + }; + })(); + + if (taskIsComplexity) { + /* ===== Plan Agent ===== */ + const planCallFn = async () => { + // Confirm 操作 + console.log(lastInteractive, interactiveInput, '\n Plan step'); + // 点了确认。此时肯定有 agentPlans + if ( + lastInteractive?.type === 'agentPlanCheck' && + interactiveInput === ConfirmPlanAgentText && + agentPlan + ) { + planHistoryMessages = undefined; + } else { + // 临时代码 + const tmpText = '正在进行规划生成...\n'; + workflowStreamResponse?.({ + event: SseResponseEventEnum.answer, + data: textAdaptGptResponse({ + text: tmpText + }) + }); + + const { answerText, plan, completeMessages, usages, interactiveResponse } = + await dispatchPlanAgent({ + historyMessages: planHistoryMessages || [], + userInput: lastInteractive ? interactiveInput : userChatInput, + interactive: lastInteractive, + subAppList, + getSubAppInfo, + systemPrompt, + model, + temperature, + top_p: aiChatTopP, + stream, + isTopPlanAgent: workflowDispatchDeep === 1 + }); + + const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`; + workflowStreamResponse?.({ + event: SseResponseEventEnum.answer, + data: textAdaptGptResponse({ + text + }) + }); + + agentPlan = plan; + + // TODO: usage 合并 + // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。 + if (interactiveResponse) { + return { + [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`, + [DispatchNodeResponseKeyEnum.memories]: { + [planMessagesKey]: filterMemoryMessages(completeMessages), + [agentPlanKey]: agentPlan + }, + [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse + }; + } else { + planHistoryMessages = undefined; + } + } + }; + const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => { + if (!agentPlan) return; + + addLog.debug(`Replan step`); // 临时代码 - const tmpText = '正在进行规划生成...\n'; + const tmpText = '\n # 正在重新进行规划生成...\n'; workflowStreamResponse?.({ event: SseResponseEventEnum.answer, data: textAdaptGptResponse({ @@ -153,22 +238,33 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise }) }); - const { answerText, plan, completeMessages, usages, interactiveResponse } = - await dispatchPlanAgent({ - historyMessages: planHistoryMessages || [], - userInput: lastInteractive ? interactiveInput : userChatInput, - interactive: lastInteractive, - subAppList, - getSubAppInfo, - systemPrompt, - model, - temperature, - top_p: aiChatTopP, - stream, - isTopPlanAgent: workflowDispatchDeep === 1 - }); + const { + answerText, + plan: rePlan, + completeMessages, + usages, + interactiveResponse + } = await dispatchReplanAgent({ + historyMessages: replanMessages || [], + userInput: lastInteractive ? interactiveInput : userChatInput, + plan, + interactive: lastInteractive, + subAppList, + getSubAppInfo, + systemPrompt, + model, + temperature, + top_p: aiChatTopP, + stream, + isTopPlanAgent: workflowDispatchDeep === 1 + }); - const text = `${answerText}${plan ? `\n\`\`\`json\n${JSON.stringify(plan, null, 2)}\n\`\`\`` : ''}`; + if (rePlan) { + agentPlan.steps.push(...rePlan.steps); + agentPlan.replan = rePlan.replan; + } + + const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`; workflowStreamResponse?.({ event: SseResponseEventEnum.answer, data: textAdaptGptResponse({ @@ -176,209 +272,41 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise }) }); - agentPlan = plan; - // TODO: usage 合并 // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。 if (interactiveResponse) { return { [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`, [DispatchNodeResponseKeyEnum.memories]: { - [planMessagesKey]: filterMemoryMessages(completeMessages), + [replanMessagesKey]: filterMemoryMessages(completeMessages), [agentPlanKey]: agentPlan }, [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse }; } else { - planHistoryMessages = undefined; + replanMessages = undefined; } - } - }; - const replanCallFn = async ({ plan }: { plan: AgentPlanType }) => { - if (!agentPlan) return; + }; - addLog.debug(`Replan step`); - // 临时代码 - const tmpText = '\n # 正在重新进行规划生成...\n'; - workflowStreamResponse?.({ - event: SseResponseEventEnum.answer, - data: textAdaptGptResponse({ - text: tmpText - }) - }); - - const { - answerText, - plan: rePlan, - completeMessages, - usages, - interactiveResponse - } = await dispatchReplanAgent({ - historyMessages: replanMessages || [], - userInput: lastInteractive ? interactiveInput : userChatInput, - plan, - interactive: lastInteractive, - subAppList, - getSubAppInfo, - systemPrompt, - model, - temperature, - top_p: aiChatTopP, - stream, - isTopPlanAgent: workflowDispatchDeep === 1 - }); - - if (rePlan) { - agentPlan.steps.push(...rePlan.steps); - agentPlan.replan = rePlan.replan; - } - - const text = `${answerText}${agentPlan ? `\n\`\`\`json\n${JSON.stringify(agentPlan, null, 2)}\n\`\`\`\n` : ''}`; - workflowStreamResponse?.({ - event: SseResponseEventEnum.answer, - data: textAdaptGptResponse({ - text - }) - }); - - // TODO: usage 合并 - // Sub agent plan 不会有交互响应。Top agent plan 肯定会有。 - if (interactiveResponse) { - return { - [DispatchNodeResponseKeyEnum.answerText]: `${tmpText}${text}`, - [DispatchNodeResponseKeyEnum.memories]: { - [planMessagesKey]: filterMemoryMessages(completeMessages), - [agentPlanKey]: agentPlan - }, - [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse - }; - } else { - replanMessages = undefined; - } - }; - - /** - * 检测问题复杂度 - * @returns true: 复杂问题,需要正常规划流程; false: 简单问题,已构造简单 plan - */ - const checkQuestionComplexity = async (): Promise => { - addLog.debug('Checking if question is simple...'); - - const simpleCheckPrompt = `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。 - -用户显式意图 (User Explicit Intent): -用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括: -* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...” -* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...” -* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...” -* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。 - -评估框架 (Assessment Framework): -* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括: - * **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。 - * **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。 - * **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。 - * **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。 -* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括: - * **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。 - * **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。 - * **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。 - * **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。 - * ** - * **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。 -待评估用户问题 (User Query): ${userChatInput} - -输出规范 (Output Specification): -请严格遵循以下 JSON 格式输出您的评估结果: -\`\`\`json -{ - "complex": true/false, - "reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。" -} -\`\`\` - -`; - - try { - const { answerText: checkResult } = await createLLMResponse({ - body: { - model: agentModel.model, - temperature: 0.1, - messages: [ - { - role: 'system', - content: simpleCheckPrompt - }, - { - role: 'user', - content: userChatInput - } - ] - } - }); - - const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult); - - if (checkResponse && !checkResponse.complex) { - // 构造一个简单的 plan,包含一个直接回答的 step - agentPlan = { - task: userChatInput, - steps: [ - { - id: 'Simple-Answer', - title: '回答问题', - description: `直接回答用户问题:${userChatInput}`, - response: undefined - } - ], - replan: false - }; - - workflowStreamResponse?.({ - event: SseResponseEventEnum.answer, - data: textAdaptGptResponse({ - text: `检测到简单问题,直接回答中...\n` - }) - }); - - return false; // 简单问题 - } else { - return true; // 复杂问题 - } - } catch (error) { - addLog.error('Simple question check failed, proceeding with normal plan flow', error); - return true; // 出错时默认走复杂流程 - } - }; - - /* ===== Plan Agent ===== */ - if (isPlanStep) { - // 如果是用户确认 plan 的交互,直接调用 planCallFn,不需要再检测复杂度 - if (lastInteractive?.type === 'agentPlanCheck' && interactiveInput === ConfirmPlanAgentText) { + // 执行 Plan/replan + if (isPlanStep) { const result = await planCallFn(); + // 有 result 代表 plan 有交互响应(check/ask) + if (result) return result; + } else if (isReplanStep) { + const result = await replanCallFn({ + plan: agentPlan! + }); if (result) return result; - } else { - // 非交互确认的情况下,先检测问题复杂度 - const isComplex = await checkQuestionComplexity(); - - if (isComplex) { - const result = await planCallFn(); - if (result) return result; - } } - } else if (isReplanStep) { - const result = await replanCallFn({ - plan: agentPlan! + + addLog.debug(`Start master agent`, { + agentPlan: JSON.stringify(agentPlan, null, 2) }); - if (result) return result; - } - addLog.debug(`Start master agent`, { - agentPlan: JSON.stringify(agentPlan, null, 2) - }); + /* ===== Master agent, 逐步执行 plan ===== */ + if (!agentPlan) return Promise.reject('没有 plan'); - /* ===== Master agent, 逐步执行 plan ===== */ - if (agentPlan) { let [inputTokens, outputTokens, subAppUsages, assistantResponses]: [ number, number, @@ -386,7 +314,7 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise AIChatItemValueItemType[] ] = [0, 0, [], []]; - while (agentPlan?.steps!.filter((item) => !item.response)!.length) { + while (agentPlan.steps!.filter((item) => !item.response)!.length) { const pendingSteps = agentPlan?.steps!.filter((item) => !item.response)!; for await (const step of pendingSteps) { @@ -468,21 +396,10 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise ...subAppUsages ] }; - } else { - // TODO: 没有 plan - console.log('没有 plan'); - - return { - // 目前 Master 不会触发交互 - // [DispatchNodeResponseKeyEnum.interactive]: interactiveResponse, - // TODO: 需要对 memoryMessages 单独建表存储 - [DispatchNodeResponseKeyEnum.memories]: { - [agentPlanKey]: agentPlan - }, - [DispatchNodeResponseKeyEnum.nodeResponse]: {}, - [DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [] - }; } + + // 简单 tool call 模式(一轮对话就结束了,不会多轮,所以不会受到连续对话的 taskIsComplexity 影响) + return Promise.reject('目前未支持简单模式'); } catch (error) { return getNodeErrResponse({ error }); } diff --git a/packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts b/packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts new file mode 100644 index 000000000..c7cc291f8 --- /dev/null +++ b/packages/service/core/workflow/dispatch/ai/agent/master/taskComplexity.ts @@ -0,0 +1,84 @@ +import { createLLMResponse } from '../../../../../ai/llm/request'; +import { parseToolArgs } from '../../utils'; +import { addLog } from '../../../../../../common/system/log'; + +const getPrompt = ({ + userChatInput +}: { + userChatInput: string; +}) => `你是一位资深的认知复杂度评估专家 (Cognitive Complexity Assessment Specialist)。 您的职责是对用户提出的任务请求进行深度解析,精准判断其内在的认知复杂度层级,并据此决定是否需要启动多步骤规划流程。 + +用户显式意图 (User Explicit Intent): +用户可能会在问题中明确表达其期望的回答方式或处理深度。 常见的意图类型包括: +* **快速回答 / 简单回答 (Quick/Simple Answer)**:用户期望得到简洁、直接的答案,无需深入分析或详细解释。 例如:“请简单回答...”、“快速告诉我...” +* **深度思考 / 详细分析 (Deep Thinking/Detailed Analysis)**:用户期望得到深入、全面的分析,包括多角度的思考、证据支持和详细的解释。 例如:“请深入分析...”、“详细解释...” +* **创造性方案 / 创新性建议 (Creative Solution/Innovative Suggestion)**:用户期望得到具有创新性的解决方案或建议,可能需要进行发散性思维和方案设计。 例如:“请提出一个创新的方案...”、“提供一些有创意的建议...” +* **无明确意图 (No Explicit Intent)**:用户没有明确表达其期望的回答方式或处理深度。 + +评估框架 (Assessment Framework): +* **低复杂度任务 (Low Complexity - \`complex: false\`)**: 此类任务具备高度的直接性和明确性,通常仅需调用单一工具或执行简单的操作即可完成。 其特征包括: +* **直接工具可解性 (Direct Tool Solvability)**:任务目标明确,可直接映射到特定的工具功能。 +* **信息可得性 (Information Accessibility)**:所需信息易于获取,无需复杂的搜索或推理。 +* **操作单一性 (Operational Singularity)**:任务执行路径清晰,无需多步骤协同。 +* **典型示例 (Typical Examples)**:信息检索 (Information Retrieval)、简单算术计算 (Simple Arithmetic Calculation)、事实性问题解答 (Factual Question Answering)、目标明确的单一指令执行 (Single, Well-Defined Instruction Execution)。 +* **高复杂度任务 (High Complexity - \'complex: true\')**: 此类任务涉及复杂的认知过程,需要进行多步骤规划、工具组合、深入分析和创造性思考才能完成。 其特征包括: +* **意图模糊性 (Intent Ambiguity)**:用户意图不明确,需要进行意图消歧 (Intent Disambiguation) 或目标细化 (Goal Refinement)。 +* **信息聚合需求 (Information Aggregation Requirement)**:需要整合来自多个信息源的数据,进行综合分析。 +* **推理与判断 (Reasoning and Judgement)**:需要进行逻辑推理、情境分析、价值判断等认知操作。 +* **创造性与探索性 (Creativity and Exploration)**:需要进行发散性思维、方案设计、假设验证等探索性活动。 +* ** +* **典型示例 (Typical Examples)**:意图不明确的请求 (Ambiguous Requests)、需要综合多个信息源的任务 (Tasks Requiring Information Synthesis from Multiple Sources)、需要复杂推理或创造性思考的问题 (Problems Requiring Complex Reasoning or Creative Thinking)。 +待评估用户问题 (User Query): ${userChatInput} + +输出规范 (Output Specification): +请严格遵循以下 JSON 格式输出您的评估结果: +\`\`\`json +{ +"complex": true/false, +"reason": "对任务认知复杂度的详细解释,说明判断的理由,并引用上述评估框架中的相关概念。" +} +\`\`\` + +`; + +export const checkTaskComplexity = async ({ + model, + userChatInput +}: { + model: string; + userChatInput: string; +}) => { + try { + const { answerText: checkResult, usage } = await createLLMResponse({ + body: { + model, + temperature: 0.1, + messages: [ + { + role: 'system', + content: getPrompt({ userChatInput }) + }, + { + role: 'user', + content: userChatInput + } + ] + } + }); + + const checkResponse = parseToolArgs<{ complex: boolean; reason: string }>(checkResult); + + return { + complex: !!checkResponse?.complex, + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens + }; + } catch (error) { + addLog.error('Simple question check failed, proceeding with normal plan flow', error); + return { + complex: true, + inputTokens: 0, + outputTokens: 0 + }; + } +}; diff --git a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts index 41023420e..2a4d50f2b 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts @@ -6,7 +6,8 @@ import { createLLMResponse } from '../../../../../../ai/llm/request'; import { getPlanAgentSystemPrompt, getReplanAgentSystemPrompt, - getReplanAgentUserPrompt + getReplanAgentUserPrompt, + getUserContent } from './prompt'; import { getLLMModel } from '../../../../../../ai/model'; import { formatModelChars2Points } from '../../../../../../../support/wallet/usage/utils'; @@ -80,6 +81,8 @@ export const dispatchPlanAgent = async ({ // 分类:query/user select/user form const lastMessages = requestMessages[requestMessages.length - 1]; console.log('user input:', userInput); + + // 上一轮是 Ask 模式,进行工具调用拼接 if ( (interactive?.type === 'agentPlanAskUserSelect' || interactive?.type === 'agentPlanAskQuery') && lastMessages.role === 'assistant' && @@ -90,20 +93,15 @@ export const dispatchPlanAgent = async ({ tool_call_id: lastMessages.tool_calls[0].id, content: userInput }); + // TODO: 是否合理 requestMessages.push({ role: 'assistant', content: '请基于以上收集的用户信息,重新生成完整的计划,严格按照 JSON Schema 输出。' }); } else { - let userContent = `任务描述:${userInput}`; - - if (systemPrompt) { - userContent += `\n\n背景信息:${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划,优先遵循用户的步骤安排和偏好。`; - } - console.log('userContent:', userInput); requestMessages.push({ role: 'user', - content: userContent + content: getUserContent({ userInput, systemPrompt, getSubAppInfo }) }); } diff --git a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts index b952cc07f..b7a81d5f4 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/prompt.ts @@ -241,6 +241,22 @@ export const getPlanAgentSystemPrompt = ({ `; }; +export const getUserContent = ({ + userInput, + systemPrompt, + getSubAppInfo +}: { + userInput: string; + systemPrompt?: string; + getSubAppInfo: GetSubAppInfoFnType; +}) => { + let userContent = `任务描述:${userInput}`; + if (systemPrompt) { + userContent += `\n\n背景信息:${parseSystemPrompt({ systemPrompt, getSubAppInfo })}\n请按照用户提供的背景信息来重新生成计划,优先遵循用户的步骤安排和偏好。`; + } + return userContent; +}; + export const getReplanAgentSystemPrompt = ({ getSubAppInfo, subAppList diff --git a/packages/web/components/common/Textarea/PromptEditor/utils.ts b/packages/web/components/common/Textarea/PromptEditor/utils.ts index 36a1cafc0..2cee589e3 100644 --- a/packages/web/components/common/Textarea/PromptEditor/utils.ts +++ b/packages/web/components/common/Textarea/PromptEditor/utils.ts @@ -506,6 +506,11 @@ export const editorStateToText = (editor: LexicalEditor) => { return node.variableKey || ''; } + // Handle skill nodes + if (node.type === 'skill') { + return `{{@${node.id}@}}`; + } + // Handle paragraph nodes - recursively process children if (node.type === 'paragraph') { if (!node.children || node.children.length === 0) { @@ -563,17 +568,6 @@ export const editorStateToText = (editor: LexicalEditor) => { children.forEach((child) => { const val = extractText(child); paragraphText.push(val); - if (child.type === 'linebreak') { - paragraphText.push('\n'); - } else if (child.type === 'text') { - paragraphText.push(child.text); - } else if (child.type === 'tab') { - paragraphText.push(' '); - } else if (child.type === 'variableLabel' || child.type === 'Variable') { - paragraphText.push(child.variableKey); - } else if (child.type === 'skill') { - paragraphText.push(`{{@${child.id}@}}`); - } }); const finalText = paragraphText.join('');