mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-26 04:32:50 +00:00
fix: usage
This commit is contained in:
parent
dd6b387d83
commit
7e8d104f53
|
|
@ -98,7 +98,10 @@ export const runAgentCall = async ({
|
|||
// 对请求的 requestMessages 进行压缩
|
||||
const taskDescription = currentStep.description || currentStep.title;
|
||||
if (taskDescription) {
|
||||
requestMessages = await compressRequestMessages(requestMessages, model, taskDescription);
|
||||
const result = await compressRequestMessages(requestMessages, model, taskDescription);
|
||||
requestMessages = result.messages;
|
||||
inputTokens += result.usage?.inputTokens || 0;
|
||||
outputTokens += result.usage?.outputTokens || 0;
|
||||
}
|
||||
|
||||
// Request LLM
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import { createLLMResponse } from '../request';
|
|||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
|
||||
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
|
||||
import { getCompressRequestMessagesPrompt } from './prompt';
|
||||
import type { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
|
||||
|
||||
/**
|
||||
* Compress a single oversized tool response
|
||||
|
|
@ -210,8 +212,14 @@ export const compressRequestMessages = async (
|
|||
messages: ChatCompletionMessageParam[],
|
||||
model: LLMModelItemType,
|
||||
currentDescription: string
|
||||
): Promise<ChatCompletionMessageParam[]> => {
|
||||
if (!messages || messages.length === 0) return messages;
|
||||
): Promise<{
|
||||
messages: ChatCompletionMessageParam[];
|
||||
usage?: ChatNodeUsageType;
|
||||
}> => {
|
||||
if (!messages || messages.length === 0)
|
||||
return {
|
||||
messages
|
||||
};
|
||||
|
||||
const tokenCount = await countGptMessagesTokens(messages);
|
||||
const thresholds = calculateCompressionThresholds(model.maxContext);
|
||||
|
|
@ -225,7 +233,9 @@ export const compressRequestMessages = async (
|
|||
|
||||
if (tokenCount <= maxTokenThreshold) {
|
||||
console.log('messages 无需压缩,共', messages.length, '条消息');
|
||||
return messages;
|
||||
return {
|
||||
messages
|
||||
};
|
||||
}
|
||||
|
||||
addLog.info('Start compressing agent messages', {
|
||||
|
|
@ -243,7 +253,7 @@ export const compressRequestMessages = async (
|
|||
const userPrompt = '请执行压缩操作,严格按照JSON格式返回结果。';
|
||||
|
||||
try {
|
||||
const { answerText } = await createLLMResponse({
|
||||
const { answerText, usage } = await createLLMResponse({
|
||||
body: {
|
||||
model,
|
||||
messages: [
|
||||
|
|
@ -263,22 +273,38 @@ export const compressRequestMessages = async (
|
|||
|
||||
if (!answerText) {
|
||||
addLog.warn('Compression failed: empty response, return original messages');
|
||||
return messages;
|
||||
return { messages };
|
||||
}
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
model: model.model,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens
|
||||
});
|
||||
const compressedUsage = {
|
||||
moduleName: 'Agent 对话历史压缩',
|
||||
model: modelName,
|
||||
totalPoints,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens
|
||||
};
|
||||
|
||||
const jsonMatch =
|
||||
answerText.match(/```json\s*([\s\S]*?)\s*```/) || answerText.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
addLog.warn('Compression failed: cannot parse JSON, return original messages');
|
||||
return messages;
|
||||
return { messages, usage: compressedUsage };
|
||||
}
|
||||
|
||||
const jsonText = jsonMatch[1] || jsonMatch[0];
|
||||
const parsed = JSON.parse(jsonText);
|
||||
const parsed = JSON.parse(jsonText) as {
|
||||
compressed_messages: ChatCompletionMessageParam[];
|
||||
compression_summary: string;
|
||||
};
|
||||
|
||||
if (!parsed.compressed_messages || !Array.isArray(parsed.compressed_messages)) {
|
||||
addLog.warn('Compression failed: invalid format, return original messages');
|
||||
return messages;
|
||||
return { messages, usage: compressedUsage };
|
||||
}
|
||||
|
||||
const compressedTokens = await countGptMessagesTokens(parsed.compressed_messages);
|
||||
|
|
@ -289,9 +315,12 @@ export const compressRequestMessages = async (
|
|||
summary: parsed.compression_summary
|
||||
});
|
||||
|
||||
return parsed.compressed_messages as ChatCompletionMessageParam[];
|
||||
return {
|
||||
messages: parsed.compressed_messages,
|
||||
usage: compressedUsage
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error('Compression failed', error);
|
||||
return messages;
|
||||
return { messages };
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -142,16 +142,16 @@ export const dispatchRunAgent = async (props: DispatchAgentModuleProps): Promise
|
|||
|
||||
/* ===== Check task complexity ===== */
|
||||
const taskIsComplexity = await (async () => {
|
||||
if (isCheckTaskComplexityStep) {
|
||||
const res = await checkTaskComplexity({
|
||||
model,
|
||||
userChatInput
|
||||
});
|
||||
if (res.usage) {
|
||||
usagePush([res.usage]);
|
||||
}
|
||||
return res.complex;
|
||||
}
|
||||
// if (isCheckTaskComplexityStep) {
|
||||
// const res = await checkTaskComplexity({
|
||||
// model,
|
||||
// userChatInput
|
||||
// });
|
||||
// if (res.usage) {
|
||||
// usagePush([res.usage]);
|
||||
// }
|
||||
// return res.complex;
|
||||
// }
|
||||
|
||||
// 对轮运行时候,代表都是进入复杂流程
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -62,15 +62,17 @@ export const stepCall = async ({
|
|||
} = props;
|
||||
|
||||
// Get depends on step ids
|
||||
const { depends, usage: dependsUsage } = await getStepDependon({
|
||||
model,
|
||||
steps,
|
||||
step
|
||||
});
|
||||
if (dependsUsage) {
|
||||
usagePush([dependsUsage]);
|
||||
if (!step.depends_on) {
|
||||
const { depends, usage: dependsUsage } = await getStepDependon({
|
||||
model,
|
||||
steps,
|
||||
step
|
||||
});
|
||||
if (dependsUsage) {
|
||||
usagePush([dependsUsage]);
|
||||
}
|
||||
step.depends_on = depends;
|
||||
}
|
||||
step.depends_on = depends;
|
||||
|
||||
// addLog.debug(`Step information`, steps);
|
||||
const systemPromptContent = await getMasterAgentSystemPrompt({
|
||||
|
|
|
|||
|
|
@ -230,17 +230,6 @@ export const dispatchReplanAgent = async ({
|
|||
}): Promise<DispatchPlanAgentResponse> => {
|
||||
const modelData = getLLMModel(model);
|
||||
|
||||
// 获取依赖的步骤
|
||||
const { depends, usage: dependsUsage } = await getStepDependon({
|
||||
model,
|
||||
steps: plan.steps,
|
||||
step: {
|
||||
id: '',
|
||||
title: '重新规划决策依据:需要依赖哪些步骤的判断',
|
||||
description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。'
|
||||
}
|
||||
});
|
||||
const replanSteps = plan.steps.filter((step) => depends.includes(step.id));
|
||||
const requestMessages: ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: 'system',
|
||||
|
|
@ -265,11 +254,25 @@ export const dispatchReplanAgent = async ({
|
|||
tool_call_id: lastMessages.tool_calls[0].id,
|
||||
content: userInput
|
||||
});
|
||||
// TODO: 确认这里是否有问题
|
||||
requestMessages.push({
|
||||
role: 'assistant',
|
||||
content: '请基于以上收集的用户信息,对 PLAN 进行重新规划,并严格按照 JSON Schema 输出。'
|
||||
});
|
||||
} else {
|
||||
// 获取依赖的步骤
|
||||
const { depends, usage: dependsUsage } = await getStepDependon({
|
||||
model,
|
||||
steps: plan.steps,
|
||||
step: {
|
||||
id: '',
|
||||
title: '重新规划决策依据:需要依赖哪些步骤的判断',
|
||||
description: '本步骤分析先前的执行结果,以确定重新规划时需要依赖哪些特定步骤。'
|
||||
}
|
||||
});
|
||||
// TODO: 推送
|
||||
const replanSteps = plan.steps.filter((step) => depends.includes(step.id));
|
||||
|
||||
requestMessages.push({
|
||||
role: 'user',
|
||||
// 根据需要 replanSteps 生成用户输入
|
||||
|
|
@ -306,15 +309,13 @@ export const dispatchReplanAgent = async ({
|
|||
if (!answerText && !toolCalls.length) {
|
||||
return Promise.reject(getEmptyResponseTip());
|
||||
}
|
||||
console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response');
|
||||
|
||||
/*
|
||||
正常输出情况:
|
||||
1. text: 正常生成plan
|
||||
2. toolCall: 调用ask工具
|
||||
3. text + toolCall: 可能生成 plan + 调用ask工具
|
||||
*/
|
||||
|
||||
// 获取生成的 plan
|
||||
const rePlan = (() => {
|
||||
if (!answerText) {
|
||||
return;
|
||||
|
|
@ -345,6 +346,7 @@ export const dispatchReplanAgent = async ({
|
|||
}
|
||||
};
|
||||
} else {
|
||||
console.log(JSON.stringify({ answerText, toolCalls }, null, 2), 'Replan response');
|
||||
return {
|
||||
type: 'agentPlanAskQuery',
|
||||
params: {
|
||||
|
|
@ -353,6 +355,9 @@ export const dispatchReplanAgent = async ({
|
|||
};
|
||||
}
|
||||
}
|
||||
|
||||
// RePlan 没有主动交互,则强制触发 check
|
||||
return PlanCheckInteractive;
|
||||
})();
|
||||
|
||||
const { totalPoints, modelName } = formatModelChars2Points({
|
||||
|
|
|
|||
|
|
@ -484,7 +484,7 @@ export const runWorkflow = async (data: RunWorkflowProps): Promise<DispatchFlowR
|
|||
const dispatchData: ModuleDispatchProps<Record<string, any>> = {
|
||||
...data,
|
||||
mcpClientMemory,
|
||||
usagePush: this.usagePush,
|
||||
usagePush: this.usagePush.bind(this),
|
||||
lastInteractive: data.lastInteractive?.entryNodeIds?.includes(node.nodeId)
|
||||
? data.lastInteractive
|
||||
: undefined,
|
||||
|
|
|
|||
Loading…
Reference in New Issue