fix: usage

This commit is contained in:
archer 2025-11-11 12:26:51 +08:00
parent dd6b387d83
commit 7e8d104f53
No known key found for this signature in database
GPG Key ID: 4446499B846D4A9E
6 changed files with 83 additions and 44 deletions

View File

@ -98,7 +98,10 @@ export const runAgentCall = async ({
// 对请求的 requestMessages 进行压缩
const taskDescription = currentStep.description || currentStep.title;
if (taskDescription) {
requestMessages = await compressRequestMessages(requestMessages, model, taskDescription);
const result = await compressRequestMessages(requestMessages, model, taskDescription);
requestMessages = result.messages;
inputTokens += result.usage?.inputTokens || 0;
outputTokens += result.usage?.outputTokens || 0;
}
// Request LLM

View File

@ -6,6 +6,8 @@ import { createLLMResponse } from '../request';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { getCompressRequestMessagesPrompt } from './prompt';
import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
/**
* Compress a single oversized tool response
@ -210,8 +212,14 @@ export const compressRequestMessages = async (
messages: ChatCompletionMessageParam[],
model: LLMModelItemType,
currentDescription: string
): Promise<ChatCompletionMessageParam[]> => {
if (!messages || messages.length === 0) return messages;
): Promise<{
messages: ChatCompletionMessageParam[];
usage?: ChatNodeUsageType;
}> => {
if (!messages || messages.length === 0)
return {
messages
};
const tokenCount = await countGptMessagesTokens(messages);
const thresholds = calculateCompressionThresholds(model.maxContext);
@ -225,7 +233,9 @@ export const compressRequestMessages = async (
if (tokenCount <= maxTokenThreshold) {
console.log('messages 无需压缩,共', messages.length, '条消息');
return messages;
return {
messages
};
}
addLog.info('Start compressing agent messages', {
@ -243,7 +253,7 @@ export const compressRequestMessages = async (
const userPrompt = '请执行压缩操作严格按照JSON格式返回结果。';
try {
const { answerText } = await createLLMResponse({
const { answerText, usage } = await createLLMResponse({
body: {
model,
messages: [
@ -263,22 +273,38 @@ export const compressRequestMessages = async (
if (!answerText) {
addLog.warn('Compression failed: empty response, return original messages');
return messages;
return { messages };
}
const { totalPoints, modelName } = formatModelChars2Points({
model: model.model,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens
});
const compressedUsage = {
moduleName: 'Agent 对话历史压缩',
model: modelName,
totalPoints,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens
};
const jsonMatch =
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
addLog.warn('Compression failed: cannot parse JSON, return original messages');
return messages;
return { messages, usage: compressedUsage };
}
const jsonText = jsonMatch[1] || jsonMatch[0];
const parsed = JSON.parse(jsonText);
const parsed = JSON.parse(jsonText) as {
compressed_messages: ChatCompletionMessageParam[];
compression_summary: string;
};
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
addLog.warn('Compression failed: invalid format, return original messages');
return messages;
return { messages, usage: compressedUsage };
}
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
@ -289,9 +315,12 @@ export const compressRequestMessages = async (
summary: parsed.compression_summary
});
return parsed.compressed_messages as ChatCompletionMessageParam[];
return {
messages: parsed.compressed_messages,
usage: compressedUsage
};
} catch (error) {
addLog.error('Compression failed', error);
return messages;
return { messages };
}
};

View File

@ -142,16 +142,16 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
/* ===== Check task complexity ===== */
const taskIsComplexity = await (async () => {
if (isCheckTaskComplexityStep) {
const res = await checkTaskComplexity({
model,
userChatInput
});
if (res.usage) {
usagePush([res.usage]);
}
return res.complex;
}
// if (isCheckTaskComplexityStep) {
// const res = await checkTaskComplexity({
// model,
// userChatInput
// });
// if (res.usage) {
// usagePush([res.usage]);
// }
// return res.complex;
// }
// 对轮运行时候,代表都是进入复杂流程
return true;

View File

@ -62,15 +62,17 @@ export const stepCall = async ({
} = props;
// Get depends on step ids
const { depends, usage: dependsUsage } = await getStepDependon({
model,
steps,
step
});
if (dependsUsage) {
usagePush([dependsUsage]);
if (!step.depends_on) {
const { depends, usage: dependsUsage } = await getStepDependon({
model,
steps,
step
});
if (dependsUsage) {
usagePush([dependsUsage]);
}
step.depends_on = depends;
}
step.depends_on = depends;
// addLog.debug(`Step information`, steps);
const systemPromptContent = await getMasterAgentSystemPrompt({

View File

@ -230,17 +230,6 @@ export const dispatchReplanAgent = async ({
}): Promise<DispatchPlanAgentResponse> => {
const modelData = getLLMModel(model);
// 获取依赖的步骤
const { depends, usage: dependsUsage } = await getStepDependon({
model,
steps: plan.steps,
step: {
id: '',
title: '重新规划决策依据:需要依赖哪些步骤的判断',
description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。'
}
});
const replanSteps = plan.steps.filter((step) => depends.includes(step.id));
const requestMessages: ChatCompletionMessageParam[] = [
{
role: 'system',
@ -265,11 +254,25 @@ export const dispatchReplanAgent = async ({
tool_call_id: lastMessages.tool_calls[0].id,
content: userInput
});
// TODO: 确认这里是否有问题
requestMessages.push({
role: 'assistant',
content: '请基于以上收集的用户信息,对 PLAN 进行重新规划,并严格按照 JSON Schema 输出。'
});
} else {
// 获取依赖的步骤
const { depends, usage: dependsUsage } = await getStepDependon({
model,
steps: plan.steps,
step: {
id: '',
title: '重新规划决策依据:需要依赖哪些步骤的判断',
description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。'
}
});
// TODO: 推送
const replanSteps = plan.steps.filter((step) => depends.includes(step.id));
requestMessages.push({
role: 'user',
// 根据需要 replanSteps 生成用户输入
@ -306,15 +309,13 @@ export const dispatchReplanAgent = async ({
if (!answerText && !toolCalls.length) {
return Promise.reject(getEmptyResponseTip());
}
console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response');
/*
1. text: 正常生成plan
2. toolCall: 调用ask工具
3. text + toolCall: 可能生成 plan + ask工具
*/
// 获取生成的 plan
const rePlan = (() => {
if (!answerText) {
return;
@ -345,6 +346,7 @@ export const dispatchReplanAgent = async ({
}
};
} else {
console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response');
return {
type: 'agentPlanAskQuery',
params: {
@ -353,6 +355,9 @@ export const dispatchReplanAgent = async ({
};
}
}
// RePlan 没有主动交互,则强制触发 check
return PlanCheckInteractive;
})();
const { totalPoints, modelName } = formatModelChars2Points({

View File

@ -484,7 +484,7 @@ export const runWorkflow = async (data: RunWorkflowProps): Promise<DispatchFlowR
const dispatchData: ModuleDispatchProps<Record<string, any>> = {
...data,
mcpClientMemory,
usagePush: this.usagePush,
usagePush: this.usagePush.bind(this),
lastInteractive: data.lastInteractive?.entryNodeIds?.includes(node.nodeId)
? data.lastInteractive
: undefined,