diff --git a/packages/service/core/ai/llm/agentCall.ts b/packages/service/core/ai/llm/agentCall.ts index a20d517a0..1ca5357c2 100644 --- a/packages/service/core/ai/llm/agentCall.ts +++ b/packages/service/core/ai/llm/agentCall.ts @@ -98,7 +98,10 @@ export const runAgentCall = async ({ // 对请求的 requestMessages 进行压缩 const taskDescription = currentStep.description || currentStep.title; if (taskDescription) { - requestMessages = await compressRequestMessages(requestMessages, model, taskDescription); + const result = await compressRequestMessages(requestMessages, model, taskDescription); + requestMessages = result.messages; + inputTokens += result.usage?.inputTokens || 0; + outputTokens += result.usage?.outputTokens || 0; } // Request LLM diff --git a/packages/service/core/ai/llm/compress/index.ts b/packages/service/core/ai/llm/compress/index.ts index 2c9928756..bf76750ea 100644 --- a/packages/service/core/ai/llm/compress/index.ts +++ b/packages/service/core/ai/llm/compress/index.ts @@ -6,6 +6,8 @@ import { createLLMResponse } from '../request'; import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type'; import { getCompressRequestMessagesPrompt } from './prompt'; +import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type'; +import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'; /** * Compress a single oversized tool response @@ -210,8 +212,14 @@ export const compressRequestMessages = async ( messages: ChatCompletionMessageParam[], model: LLMModelItemType, currentDescription: string -): Promise => { - if (!messages || messages.length === 0) return messages; +): Promise<{ + messages: ChatCompletionMessageParam[]; + usage?: ChatNodeUsageType; +}> => { + if (!messages || messages.length === 0) + return { + messages + }; const tokenCount = await countGptMessagesTokens(messages); const thresholds = calculateCompressionThresholds(model.maxContext); @@ -225,7 +233,9 @@ export const compressRequestMessages = async ( if (tokenCount <= maxTokenThreshold) { console.log('messages 无需压缩,共', messages.length, '条消息'); - return messages; + return { + messages + }; } addLog.info('Start compressing agent messages', { @@ -243,7 +253,7 @@ export const compressRequestMessages = async ( const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。'; try { - const { answerText } = await createLLMResponse({ + const { answerText, usage } = await createLLMResponse({ body: { model, messages: [ @@ -263,22 +273,38 @@ export const compressRequestMessages = async ( if (!answerText) { addLog.warn('Compression failed: empty response, return original messages'); - return messages; + return { messages }; } + const { totalPoints, modelName } = formatModelChars2Points({ + model: model.model, + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens + }); + const compressedUsage = { + moduleName: 'Agent 对话历史压缩', + model: modelName, + totalPoints, + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens + }; + const jsonMatch = answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/); if (!jsonMatch) { addLog.warn('Compression failed: cannot parse JSON, return original messages'); - return messages; + return { messages, usage: compressedUsage }; } const jsonText = jsonMatch[1] || jsonMatch[0]; - const parsed = JSON.parse(jsonText); + const parsed = JSON.parse(jsonText) as { + compressed_messages: ChatCompletionMessageParam[]; + compression_summary: string; + }; if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) { addLog.warn('Compression failed: invalid format, return original messages'); - return messages; + return { messages, usage: compressedUsage }; } const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages); @@ -289,9 +315,12 @@ export const compressRequestMessages = async ( summary: parsed.compression_summary }); - return parsed.compressed_messages as ChatCompletionMessageParam[]; + return { + messages: parsed.compressed_messages, + usage: compressedUsage + }; } catch (error) { addLog.error('Compression failed', error); - return messages; + return { messages }; } }; diff --git a/packages/service/core/workflow/dispatch/ai/agent/index.ts b/packages/service/core/workflow/dispatch/ai/agent/index.ts index 1057a86aa..0580f1331 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/index.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/index.ts @@ -142,16 +142,16 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise /* ===== Check task complexity ===== */ const taskIsComplexity = await (async () => { - if (isCheckTaskComplexityStep) { - const res = await checkTaskComplexity({ - model, - userChatInput - }); - if (res.usage) { - usagePush([res.usage]); - } - return res.complex; - } + // if (isCheckTaskComplexityStep) { + // const res = await checkTaskComplexity({ + // model, + // userChatInput + // }); + // if (res.usage) { + // usagePush([res.usage]); + // } + // return res.complex; + // } // 对轮运行时候,代表都是进入复杂流程 return true; diff --git a/packages/service/core/workflow/dispatch/ai/agent/master/call.ts b/packages/service/core/workflow/dispatch/ai/agent/master/call.ts index e9336e635..9b50e24d6 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/master/call.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/master/call.ts @@ -62,15 +62,17 @@ export const stepCall = async ({ } = props; // Get depends on step ids - const { depends, usage: dependsUsage } = await getStepDependon({ - model, - steps, - step - }); - if (dependsUsage) { - usagePush([dependsUsage]); + if (!step.depends_on) { + const { depends, usage: dependsUsage } = await getStepDependon({ + model, + steps, + step + }); + if (dependsUsage) { + usagePush([dependsUsage]); + } + step.depends_on = depends; } - step.depends_on = depends; // addLog.debug(`Step information`, steps); const systemPromptContent = await getMasterAgentSystemPrompt({ diff --git a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts index 548a345bb..e71fae964 100644 --- a/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts +++ b/packages/service/core/workflow/dispatch/ai/agent/sub/plan/index.ts @@ -230,17 +230,6 @@ export const dispatchReplanAgent = async ({ }): Promise => { const modelData = getLLMModel(model); - // 获取依赖的步骤 - const { depends, usage: dependsUsage } = await getStepDependon({ - model, - steps: plan.steps, - step: { - id: '', - title: '重新规划决策依据:需要依赖哪些步骤的判断', - description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。' - } - }); - const replanSteps = plan.steps.filter((step) => depends.includes(step.id)); const requestMessages: ChatCompletionMessageParam[] = [ { role: 'system', @@ -265,11 +254,25 @@ export const dispatchReplanAgent = async ({ tool_call_id: lastMessages.tool_calls[0].id, content: userInput }); + // TODO: 确认这里是否有问题 requestMessages.push({ role: 'assistant', content: '请基于以上收集的用户信息,对 PLAN 进行重新规划,并严格按照 JSON Schema 输出。' }); } else { + // 获取依赖的步骤 + const { depends, usage: dependsUsage } = await getStepDependon({ + model, + steps: plan.steps, + step: { + id: '', + title: '重新规划决策依据:需要依赖哪些步骤的判断', + description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。' + } + }); + // TODO: 推送 + const replanSteps = plan.steps.filter((step) => depends.includes(step.id)); + requestMessages.push({ role: 'user', // 根据需要 replanSteps 生成用户输入 @@ -306,15 +309,13 @@ export const dispatchReplanAgent = async ({ if (!answerText && !toolCalls.length) { return Promise.reject(getEmptyResponseTip()); } - console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response'); + /* 正常输出情况: 1. text: 正常生成plan 2. toolCall: 调用ask工具 3. text + toolCall: 可能生成 plan + 调用ask工具 */ - - // 获取生成的 plan const rePlan = (() => { if (!answerText) { return; @@ -345,6 +346,7 @@ export const dispatchReplanAgent = async ({ } }; } else { + console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response'); return { type: 'agentPlanAskQuery', params: { @@ -353,6 +355,9 @@ export const dispatchReplanAgent = async ({ }; } } + + // RePlan 没有主动交互,则强制触发 check + return PlanCheckInteractive; })(); const { totalPoints, modelName } = formatModelChars2Points({ diff --git a/packages/service/core/workflow/dispatch/index.ts b/packages/service/core/workflow/dispatch/index.ts index d013888c8..133592753 100644 --- a/packages/service/core/workflow/dispatch/index.ts +++ b/packages/service/core/workflow/dispatch/index.ts @@ -484,7 +484,7 @@ export const runWorkflow = async (data: RunWorkflowProps): Promise> = { ...data, mcpClientMemory, - usagePush: this.usagePush, + usagePush: this.usagePush.bind(this), lastInteractive: data.lastInteractive?.entryNodeIds?.includes(node.nodeId) ? data.lastInteractive : undefined,