From 31c17999b8a2c3c5d17f1719554d98560ade2396 Mon Sep 17 00:00:00 2001 From: chanzhi82020 Date: Mon, 25 Aug 2025 16:57:37 +0800 Subject: [PATCH] This PR introduces evaluation support designed specifically to track and benchmark applications built on the FastGPT platform. (#5476) - Adds a lightweight evaluation framework for app-level tracking and benchmarking. - Changes: 28 files, +1455 additions, -66 deletions. - Branch: add-evaluations -> main. - PR: https://github.com/chanzhi82020/FastGPT/pull/1 Applications built on FastGPT need repeatable, comparable benchmarks to measure regressions, track improvements, and validate releases. This initial implementation provides the primitives to define evaluation scenarios, run them against app endpoints or model components, and persist results for later analysis. I updated the PR description to emphasize that the evaluation system is targeted at FastGPT-built apps and expanded the explanation of the core pieces so reviewers understand the scope and intended use. The new description outlines the feature intent, core components, and how results are captured and aggregated for benchmarking. - Evaluation definitions - Define evaluation tasks that reference an app (app id, version, endpoint), test datasets or input cases, expected outputs (when applicable), and run configuration (parallelism, timeouts). - Support for custom metric plugins so teams can add domain-specific measures. - Runner / Executor - Executes evaluation cases against app endpoints or internal model interfaces. - Captures raw responses, response times, status codes, and any runtime errors. - Computes per-case metrics (e.g., correctness, latency) immediately after each case run. - Metrics & Aggregation - Built-in metrics: accuracy/success rate, latency (p50/p90/p99), throughput, error rate. - Aggregation produces per-run summaries and per-app historical summaries for trend analysis. - Allows combining metrics into composite scores for high-level benchmarking. - Persistence & Logging - Stores run results, input/output pairs (when needed), timestamps, environment info, and app/version metadata so runs are reproducible and auditable. - Logs are retained to facilitate debugging and root-cause analysis of regressions. - Reporting & Comparison - Produces aggregated reports suitable for CI gating, release notes, or dashboards. - Supports comparing multiple app versions or deployments side-by-side. - Extensibility & Integration - Designed to plug into CI (automated runs on PRs or releases), dashboards, and downstream analysis tools. - Easy to add new metrics, evaluators, or dataset connectors. By centering the evaluation system on FastGPT apps, teams can benchmark full application behavior (not only raw model outputs), correlate metrics with deployment configurations, and make informed release decisions. - Expand built-in metric suite (e.g., F1, BLEU/ROUGE where applicable), add dataset connectors, and provide example evaluation scenarios for sample apps. - Integrate with CI pipelines and add basic dashboarding for trend visualization. Related Issue: N/A Co-authored-by: Archer <545436317@qq.com> --- .../global/core/{app => }/evaluation/api.d.ts | 0 .../core/{app => }/evaluation/constants.ts | 2 +- .../core/{app => }/evaluation/type.d.ts | 0 .../global/core/{app => }/evaluation/utils.ts | 2 +- packages/service/core/app/controller.ts | 4 +- .../{app => }/evaluation/evalItemSchema.ts | 6 +- .../core/{app => }/evaluation/evalSchema.ts | 8 +- packages/service/core/evaluation/index.ts | 370 ++++++++++++++++++ .../service/core/{app => }/evaluation/mq.ts | 4 +- packages/service/core/evaluation/scoring.ts | 129 ++++++ .../core/{app => }/evaluation/utils.ts | 80 +++- .../support/permission/evaluation/auth.ts | 4 +- packages/service/type/env.d.ts | 4 + projects/app/.env.template | 4 + projects/app/src/instrumentation.ts | 3 + .../account/model/AddModelBox.tsx | 18 +- .../pageComponents/dashboard/Container.tsx | 17 +- .../{app => }/evaluation/DetailModal.tsx | 10 +- .../src/pages/api/core/evaluation/create.ts | 159 ++++++++ .../src/pages/api/core/evaluation/delete.ts | 53 +++ .../pages/api/core/evaluation/deleteItem.ts | 23 ++ .../pages/api/core/evaluation/exportItems.ts | 112 ++++++ .../app/src/pages/api/core/evaluation/list.ts | 194 +++++++++ .../pages/api/core/evaluation/listItems.ts | 83 ++++ .../pages/api/core/evaluation/retryItem.ts | 45 +++ .../pages/api/core/evaluation/updateItem.ts | 46 +++ .../src/pages/dashboard/evaluation/create.tsx | 6 +- .../src/pages/dashboard/evaluation/index.tsx | 6 +- .../{app/api => evaluation}/evaluation.ts | 19 +- 29 files changed, 1341 insertions(+), 70 deletions(-) rename packages/global/core/{app => }/evaluation/api.d.ts (100%) rename packages/global/core/{app => }/evaluation/constants.ts (91%) rename packages/global/core/{app => }/evaluation/type.d.ts (100%) rename packages/global/core/{app => }/evaluation/utils.ts (85%) rename packages/service/core/{app => }/evaluation/evalItemSchema.ts (82%) rename packages/service/core/{app => }/evaluation/evalSchema.ts (83%) create mode 100644 packages/service/core/evaluation/index.ts rename packages/service/core/{app => }/evaluation/mq.ts (94%) create mode 100644 packages/service/core/evaluation/scoring.ts rename packages/service/core/{app => }/evaluation/utils.ts (60%) rename projects/app/src/pageComponents/{app => }/evaluation/DetailModal.tsx (98%) create mode 100644 projects/app/src/pages/api/core/evaluation/create.ts create mode 100644 projects/app/src/pages/api/core/evaluation/delete.ts create mode 100644 projects/app/src/pages/api/core/evaluation/deleteItem.ts create mode 100644 projects/app/src/pages/api/core/evaluation/exportItems.ts create mode 100644 projects/app/src/pages/api/core/evaluation/list.ts create mode 100644 projects/app/src/pages/api/core/evaluation/listItems.ts create mode 100644 projects/app/src/pages/api/core/evaluation/retryItem.ts create mode 100644 projects/app/src/pages/api/core/evaluation/updateItem.ts rename projects/app/src/web/core/{app/api => evaluation}/evaluation.ts (67%) diff --git a/packages/global/core/app/evaluation/api.d.ts b/packages/global/core/evaluation/api.d.ts similarity index 100% rename from packages/global/core/app/evaluation/api.d.ts rename to packages/global/core/evaluation/api.d.ts diff --git a/packages/global/core/app/evaluation/constants.ts b/packages/global/core/evaluation/constants.ts similarity index 91% rename from packages/global/core/app/evaluation/constants.ts rename to packages/global/core/evaluation/constants.ts index d6b029858..562426771 100644 --- a/packages/global/core/app/evaluation/constants.ts +++ b/packages/global/core/evaluation/constants.ts @@ -1,4 +1,4 @@ -import { i18nT } from '../../../../web/i18n/utils'; +import { i18nT } from '../../../web/i18n/utils'; export const evaluationFileErrors = i18nT('dashboard_evaluation:eval_file_check_error'); diff --git a/packages/global/core/app/evaluation/type.d.ts b/packages/global/core/evaluation/type.d.ts similarity index 100% rename from packages/global/core/app/evaluation/type.d.ts rename to packages/global/core/evaluation/type.d.ts diff --git a/packages/global/core/app/evaluation/utils.ts b/packages/global/core/evaluation/utils.ts similarity index 85% rename from packages/global/core/app/evaluation/utils.ts rename to packages/global/core/evaluation/utils.ts index adad61c67..4e29fcfb9 100644 --- a/packages/global/core/app/evaluation/utils.ts +++ b/packages/global/core/evaluation/utils.ts @@ -1,4 +1,4 @@ -import type { VariableItemType } from '../type'; +import type { VariableItemType } from '../app/type'; export const getEvaluationFileHeader = (appVariables?: VariableItemType[]) => { if (!appVariables || appVariables.length === 0) return '*q,*a,history'; diff --git a/packages/service/core/app/controller.ts b/packages/service/core/app/controller.ts index 5cf05f1fb..3970468de 100644 --- a/packages/service/core/app/controller.ts +++ b/packages/service/core/app/controller.ts @@ -7,8 +7,8 @@ import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node' import { encryptSecretValue, storeSecretValue } from '../../common/secret/utils'; import { SystemToolInputTypeEnum } from '@fastgpt/global/core/app/systemTool/constants'; import { type ClientSession } from '../../common/mongo'; -import { MongoEvaluation } from './evaluation/evalSchema'; -import { removeEvaluationJob } from './evaluation/mq'; +import { MongoEvaluation } from '../evaluation/evalSchema'; +import { removeEvaluationJob } from '../evaluation/mq'; import { deleteChatFiles } from '../chat/controller'; import { MongoChatItem } from '../chat/chatItemSchema'; import { MongoChat } from '../chat/chatSchema'; diff --git a/packages/service/core/app/evaluation/evalItemSchema.ts b/packages/service/core/evaluation/evalItemSchema.ts similarity index 82% rename from packages/service/core/app/evaluation/evalItemSchema.ts rename to packages/service/core/evaluation/evalItemSchema.ts index 45e8633da..732b59f68 100644 --- a/packages/service/core/app/evaluation/evalItemSchema.ts +++ b/packages/service/core/evaluation/evalItemSchema.ts @@ -1,10 +1,10 @@ -import { connectionMongo, getMongoModel } from '../../../common/mongo'; +import { connectionMongo, getMongoModel } from '../../common/mongo'; import { EvaluationCollectionName } from './evalSchema'; import { EvaluationStatusEnum, EvaluationStatusValues -} from '@fastgpt/global/core/app/evaluation/constants'; -import type { EvalItemSchemaType } from '@fastgpt/global/core/app/evaluation/type'; +} from '@fastgpt/global/core/evaluation/constants'; +import type { EvalItemSchemaType } from '@fastgpt/global/core/evaluation/type'; const { Schema } = connectionMongo; diff --git a/packages/service/core/app/evaluation/evalSchema.ts b/packages/service/core/evaluation/evalSchema.ts similarity index 83% rename from packages/service/core/app/evaluation/evalSchema.ts rename to packages/service/core/evaluation/evalSchema.ts index a8678ebda..458d89f4d 100644 --- a/packages/service/core/app/evaluation/evalSchema.ts +++ b/packages/service/core/evaluation/evalSchema.ts @@ -2,10 +2,10 @@ import { TeamCollectionName, TeamMemberCollectionName } from '@fastgpt/global/support/user/team/constant'; -import { connectionMongo, getMongoModel } from '../../../common/mongo'; -import { AppCollectionName } from '../schema'; -import type { EvaluationSchemaType } from '@fastgpt/global/core/app/evaluation/type'; -import { UsageCollectionName } from '../../../support/wallet/usage/schema'; +import { connectionMongo, getMongoModel } from '../../common/mongo'; +import { AppCollectionName } from '../app/schema'; +import type { EvaluationSchemaType } from '@fastgpt/global/core/evaluation/type'; +import { UsageCollectionName } from '../../support/wallet/usage/schema'; const { Schema } = connectionMongo; export const EvaluationCollectionName = 'eval'; diff --git a/packages/service/core/evaluation/index.ts b/packages/service/core/evaluation/index.ts new file mode 100644 index 000000000..7f2156c94 --- /dev/null +++ b/packages/service/core/evaluation/index.ts @@ -0,0 +1,370 @@ +import { addLog } from '../../common/system/log'; +import type { Job } from '../../common/bullmq'; +import { getEvaluationWorker, type EvaluationJobData, removeEvaluationJob } from './mq'; +import { MongoEvalItem } from './evalItemSchema'; +import { Types } from 'mongoose'; +import { dispatchWorkFlow } from '../workflow/dispatch'; +import { MongoEvaluation } from './evalSchema'; +import { getNanoid } from '@fastgpt/global/common/string/tools'; +import { getAppLatestVersion } from '../../core/app/version/controller'; +import { + getWorkflowEntryNodeIds, + storeEdges2RuntimeEdges, + storeNodes2RuntimeNodes +} from '@fastgpt/global/core/workflow/runtime/utils'; +import type { UserChatItemValueItemType } from '@fastgpt/global/core/chat/type'; +import { ChatItemValueTypeEnum } from '@fastgpt/global/core/chat/constants'; +import { WORKFLOW_MAX_RUN_TIMES } from '../../core/workflow/constants'; +import { getAppEvaluationScore } from './scoring'; +import { checkTeamAIPoints } from '../../support/permission/teamLimit'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import type { + EvalItemSchemaType, + EvaluationSchemaType +} from '@fastgpt/global/core/evaluation/type'; +import type { Document } from 'mongoose'; +import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; +import { + InformLevelEnum, + SendInformTemplateCodeEnum +} from '@fastgpt/global/support/user/inform/constants'; +import type { AppChatConfigType, AppSchema } from '@fastgpt/global/core/app/type'; +import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node'; +import type { StoreEdgeItemType } from '@fastgpt/global/core/workflow/type/edge'; +import { getErrText } from '@fastgpt/global/common/error/utils'; +import { formatModelChars2Points } from '../../support/wallet/usage/utils'; +import { ModelTypeEnum } from '@fastgpt/global/core/ai/model'; +import { concatUsage } from '../../support/wallet/usage/controller'; +import { MongoApp } from '../../core/app/schema'; +import { delay } from '@fastgpt/global/common/system/utils'; +import { removeDatasetCiteText } from '../../core/ai/utils'; +import { getUserChatInfoAndAuthTeamPoints } from '../../support/permission/auth/team'; +import { getRunningUserInfoByTmbId } from '../../support/user/team/utils'; + +type AppContextType = { + appData: AppSchema; + timezone: string; + externalProvider: Record; + nodes: StoreNodeItemType[]; + edges: StoreEdgeItemType[]; + chatConfig: AppChatConfigType; +}; + +export const initEvaluationWorker = () => { + addLog.info('Init Evaluation Worker...'); + return getEvaluationWorker(processor); +}; + +const dealAiPointCheckError = async (evalId: string, error: any) => { + if (error === TeamErrEnum.aiPointsNotEnough) { + await MongoEvaluation.updateOne( + { _id: new Types.ObjectId(evalId) }, + { $set: { errorMessage: error } } + ); + + const evaluation = await MongoEvaluation.findById(evalId).lean(); + if (evaluation) { + sendInform2OneUser({ + level: InformLevelEnum.important, + templateCode: 'LACK_OF_POINTS', + templateParam: {}, + teamId: evaluation.teamId + }); + } + return; + } + + return Promise.reject(error); +}; + +const finishEvaluation = async (evalId: string) => { + // Computed all eval score and add to evaluation collection + const scoreResult = await MongoEvalItem.aggregate([ + { + $match: { + evalId: new Types.ObjectId(evalId), + status: EvaluationStatusEnum.completed, + errorMessage: { $exists: false }, + score: { $exists: true } + } + }, + { + $group: { + _id: null, + avgScore: { $avg: '$score' } + } + } + ]); + + const avgScore = scoreResult.length > 0 ? scoreResult[0].avgScore : 0; + + await MongoEvaluation.updateOne( + { _id: new Types.ObjectId(evalId) }, + { + $set: { + finishTime: new Date(), + score: avgScore + } + } + ); + + addLog.info('[Evaluation] Task finished', { evalId, avgScore }); +}; + +const handleEvalItemError = async ( + evalItem: Document & EvalItemSchemaType, + error: any +) => { + const errorMessage = getErrText(error); + + await MongoEvalItem.updateOne( + { _id: evalItem._id }, + { + $inc: { retry: -1 }, + $set: { + errorMessage + } + } + ); +}; + +const createMergedEvaluationUsage = async ( + params: { + evaluation: EvaluationSchemaType; + totalPoints: number; + } & ( + | { + type: 'run'; + } + | { + type: 'eval'; + inputTokens: number; + outputTokens: number; + } + ) +) => { + const { evaluation, totalPoints } = params; + + if (params.type === 'run') { + await concatUsage({ + billId: evaluation.usageId, + teamId: evaluation.teamId, + tmbId: evaluation.tmbId, + totalPoints, + count: 1, + listIndex: 0 + }); + } else if (params.type === 'eval') { + await concatUsage({ + billId: evaluation.usageId, + teamId: evaluation.teamId, + tmbId: evaluation.tmbId, + totalPoints, + inputTokens: params.inputTokens, + outputTokens: params.outputTokens, + listIndex: 1 + }); + } +}; + +const processEvalItem = async ({ + evalItem, + evaluation, + appContext +}: { + evalItem: Document & EvalItemSchemaType; + evaluation: EvaluationSchemaType; + appContext: AppContextType; +}) => { + const getAppAnswer = async (): Promise => { + if (evalItem?.response) { + return evalItem.response; + } + + const { appData, timezone, externalProvider, nodes, edges, chatConfig } = appContext; + const chatId = getNanoid(); + + const query: UserChatItemValueItemType[] = [ + { + type: ChatItemValueTypeEnum.text, + text: { + content: evalItem?.question || '' + } + } + ]; + + const histories = (() => { + try { + return evalItem?.history ? JSON.parse(evalItem.history) : []; + } catch (error) { + return []; + } + })(); + + const { assistantResponses, flowUsages } = await dispatchWorkFlow({ + chatId, + timezone, + externalProvider, + mode: 'chat', + runningAppInfo: { + id: String(appData._id), + teamId: String(appData.teamId), + tmbId: String(appData.tmbId) + }, + runningUserInfo: await getRunningUserInfoByTmbId(evaluation.tmbId), + uid: String(evaluation.tmbId), + runtimeNodes: storeNodes2RuntimeNodes(nodes, getWorkflowEntryNodeIds(nodes)), + runtimeEdges: storeEdges2RuntimeEdges(edges), + variables: evalItem?.globalVariables || {}, + query, + chatConfig, + histories, + stream: false, + maxRunTimes: WORKFLOW_MAX_RUN_TIMES + }); + const totalPoints = flowUsages.reduce((sum, item) => sum + (item.totalPoints || 0), 0); + const appAnswer = removeDatasetCiteText(assistantResponses[0]?.text?.content || '', false); + + evalItem.response = appAnswer; + evalItem.responseTime = new Date(); + await evalItem.save(); + + // Push usage + createMergedEvaluationUsage({ + evaluation, + totalPoints, + type: 'run' + }); + + return appAnswer; + }; + + const appAnswer = await getAppAnswer(); + + // Eval score + const { accuracyScore, usage } = await getAppEvaluationScore({ + question: evalItem?.question || '', + appAnswer, + standardAnswer: evalItem?.expectedResponse || '', + model: evaluation.evalModel + }); + + evalItem.status = EvaluationStatusEnum.completed; + evalItem.accuracy = accuracyScore; + evalItem.score = accuracyScore; + evalItem.finishTime = new Date(); + await evalItem.save(); + + // Push usage + const { totalPoints: evalModelPoints } = formatModelChars2Points({ + model: evaluation.evalModel, + modelType: ModelTypeEnum.llm, + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens + }); + createMergedEvaluationUsage({ + evaluation, + totalPoints: evalModelPoints, + type: 'eval', + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens + }); +}; + +const processor = async (job: Job) => { + const { evalId } = job.data; + + // 初始化检查 + const evaluation = await MongoEvaluation.findById(evalId); + if (!evaluation) { + addLog.warn('[Evaluation] Eval not found', { evalId }); + await removeEvaluationJob(evalId); + return; + } + + const appData = await MongoApp.findById(evaluation.appId); + if (!appData) { + addLog.warn('[Evaluation] App not found', { evalId }); + await removeEvaluationJob(evalId); + return; + } + + const [{ timezone, externalProvider }, { nodes, edges, chatConfig }] = await Promise.all([ + getUserChatInfoAndAuthTeamPoints(appData.tmbId), + getAppLatestVersion(appData._id, appData), + // Reset error message + MongoEvaluation.updateOne({ _id: new Types.ObjectId(evalId) }, { $set: { errorMessage: null } }) + ]); + + const appContext: AppContextType = { + appData, + timezone, + externalProvider, + nodes, + edges, + chatConfig + }; + + // 主循环 + while (true) { + try { + await checkTeamAIPoints(evaluation.teamId); + } catch (error) { + return await dealAiPointCheckError(evalId, error); + } + + const evalItem = await MongoEvalItem.findOneAndUpdate( + { + evalId, + status: { $in: [EvaluationStatusEnum.queuing, EvaluationStatusEnum.evaluating] }, + retry: { $gt: 0 } + }, + { + $set: { status: EvaluationStatusEnum.evaluating } + } + ); + if (!evalItem) { + await finishEvaluation(evalId); + break; + } + + // Process eval item + try { + await processEvalItem({ + evalItem, + evaluation, + appContext + }); + } catch (error) { + if (error === 'Evaluation model not found') { + addLog.warn('[Evaluation] Model not found', { evalId, model: evaluation.evalModel }); + + await MongoEvaluation.updateOne( + { _id: new Types.ObjectId(evalId) }, + { $set: { errorMessage: `Model ${evaluation.evalModel} not found` } } + ).catch(); + + break; + } + + await handleEvalItemError(evalItem, error); + await delay(100); + } + } +}; +function getMessageTemplate(templateCode: any): { + getInformTemplate: any; + lockMinutes: any; + isSendQueue: any; +} { + throw new Error('Function not implemented.'); +} + +function sendInform2OneUser(arg0: { + level: InformLevelEnum; + templateCode: string; + templateParam: {}; + teamId: string; +}) { + addLog.warn('sendInform2OneUser: Starting notification process:', arg0); +} diff --git a/packages/service/core/app/evaluation/mq.ts b/packages/service/core/evaluation/mq.ts similarity index 94% rename from packages/service/core/app/evaluation/mq.ts rename to packages/service/core/evaluation/mq.ts index c0192d476..8a5625b8a 100644 --- a/packages/service/core/app/evaluation/mq.ts +++ b/packages/service/core/evaluation/mq.ts @@ -1,6 +1,6 @@ -import { getQueue, getWorker, QueueNames } from '../../../common/bullmq'; +import { getQueue, getWorker, QueueNames } from '../../common/bullmq'; import { type Processor } from 'bullmq'; -import { addLog } from '../../../common/system/log'; +import { addLog } from '../../common/system/log'; export type EvaluationJobData = { evalId: string; diff --git a/packages/service/core/evaluation/scoring.ts b/packages/service/core/evaluation/scoring.ts new file mode 100644 index 000000000..00f4fd9ba --- /dev/null +++ b/packages/service/core/evaluation/scoring.ts @@ -0,0 +1,129 @@ +import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type'; +import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; +import { getLLMModel } from '../../core/ai/model'; +import { createChatCompletion } from '../../core/ai/config'; +import { formatLLMResponse, llmCompletionsBodyFormat } from '../../core/ai/utils'; +import { loadRequestMessages } from '../../core/chat/utils'; +import { countGptMessagesTokens, countPromptTokens } from '../../common/string/tiktoken'; + +const template_accuracy1 = ` +Instruction: You are a world class state of the art assistant for rating a User Answer given a Question. The Question is completely answered by the Reference Answer. +Say 4, if User Answer is full contained and equivalent to Reference Answer in all terms, topics, numbers, metrics, dates and units. +Say 2, if User Answer is partially contained and almost equivalent to Reference Answer in all terms, topics, numbers, metrics, dates and units. +Say 0, if User Answer is not contained in Reference Answer or not accurate in all terms, topics, numbers, metrics, dates and units or the User Answer do not answer the question. +Do not explain or justify your rating. Your rating must be only 4, 2 or 0 according to the instructions above. + +## Question +{query} + +## Answer0 +{sentence_inference} + +## Answer1 +{sentence_true} + +## Rating`; + +const template_accuracy2 = ` +I will rate the User Answer in comparison to the Reference Answer for a given Question. +A rating of 4 indicates that the User Answer is entirely consistent with the Reference Answer, covering all aspects, topics, numbers, metrics, dates, and units. +A rating of 2 signifies that the User Answer is mostly aligned with the Reference Answer, with minor discrepancies in some areas. +A rating of 0 means that the User Answer is either inaccurate, incomplete, or unrelated to the Reference Answer, or it fails to address the Question. +I will provide the rating without any explanation or justification, adhering to the following scale: 0 (no match), 2 (partial match), 4 (exact match). +Do not explain or justify my rating. My rating must be only 4, 2 or 0 only. + +## Question +{query} + +## Answer0 +{sentence_inference} + +## Answer1 +{sentence_true} + +## Rating`; + +export const getAppEvaluationScore = async ({ + question, + appAnswer, + standardAnswer, + model +}: { + question: string; + appAnswer: string; + standardAnswer: string; + model: string; +}) => { + const modelData = getLLMModel(model); + if (!modelData) { + return Promise.reject('Evaluation model not found'); + } + + const getEvalResult = async (template: string) => { + const messages: ChatCompletionMessageParam[] = [ + { + role: ChatCompletionRequestMessageRoleEnum.System, + content: template + }, + { + role: ChatCompletionRequestMessageRoleEnum.User, + content: [ + { + type: 'text', + text: `## Question +${question} + +## Answer0 +${appAnswer} + +## Answer1 +${standardAnswer} + +## Rating` + } + ] + } + ]; + const { response } = await createChatCompletion({ + body: llmCompletionsBodyFormat( + { + model: modelData.model, + temperature: 0.3, + messages: await loadRequestMessages({ messages, useVision: true }), + stream: true, + max_tokens: 5 + }, + modelData + ) + }); + + const { text, usage } = await formatLLMResponse(response); + + const numberText = Number(text); + const rate = isNaN(numberText) ? 0 : numberText / 4; + + return { + rate, + inputTokens: usage?.prompt_tokens || (await countGptMessagesTokens(messages)), + outputTokens: usage?.completion_tokens || (await countPromptTokens(text)) + }; + }; + + const results = await Promise.all([ + getEvalResult(template_accuracy1), + getEvalResult(template_accuracy2) + ]); + + const accuracyScore = + Math.round((results.reduce((acc, item) => acc + item.rate, 0) / results.length) * 100) / 100; + const inputTokens = results.reduce((acc, item) => acc + item.inputTokens, 0); + const outputTokens = results.reduce((acc, item) => acc + item.outputTokens, 0); + + return { + accuracyScore, + usage: { + inputTokens, + outputTokens + } + }; +}; diff --git a/packages/service/core/app/evaluation/utils.ts b/packages/service/core/evaluation/utils.ts similarity index 60% rename from packages/service/core/app/evaluation/utils.ts rename to packages/service/core/evaluation/utils.ts index 57dc28008..25dab9f39 100644 --- a/packages/service/core/app/evaluation/utils.ts +++ b/packages/service/core/evaluation/utils.ts @@ -1,8 +1,15 @@ -import { evaluationFileErrors } from '@fastgpt/global/core/app/evaluation/constants'; -import { getEvaluationFileHeader } from '@fastgpt/global/core/app/evaluation/utils'; -import type { VariableItemType } from '@fastgpt/global/core/app/type'; -import { addLog } from '../../../common/system/log'; import { VariableInputEnum } from '@fastgpt/global/core/workflow/constants'; +import { evaluationFileErrors } from '@fastgpt/global/core/evaluation/constants'; +import { getEvaluationFileHeader } from '@fastgpt/global/core/evaluation/utils'; +import type { VariableItemType } from '@fastgpt/global/core/app/type'; +// import { addLog } from '@fastgpt/service/common/system/log'; +import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; +import { Types } from 'mongoose'; +import { retryFn } from '@fastgpt/global/common/system/utils'; +import { i18nT } from '../../../web/i18n/utils'; +import { addLog } from '../../common/system/log'; +import { MongoEvaluation } from './evalSchema'; +import { addEvaluationJob } from './mq'; import Papa from 'papaparse'; export const parseEvaluationCSV = (rawText: string) => { @@ -24,15 +31,27 @@ export const validateEvaluationFile = async ( rawText: string, appVariables?: VariableItemType[] ) => { - // Parse CSV using Papa Parse - const csvData = parseEvaluationCSV(rawText); - const dataLength = csvData.length; + // const lines = rawText.trim().split('\r\n'); + // const dataLength = lines.length; + + // 使用正则表达式分割所有类型的换行符(\r\n、\n、\r) + const lines = rawText.trim().split(/\r?\n|\r/); + const dataLength = lines.length; + + // 过滤可能的空行(处理文件末尾可能的空行) + const nonEmptyLines = lines.filter((line) => line.trim() !== ''); + if (nonEmptyLines.length === 0) { + addLog.error('File is empty'); + return Promise.reject(evaluationFileErrors); + } // Validate file header const expectedHeader = getEvaluationFileHeader(appVariables); - const actualHeader = csvData[0]?.join(',') || ''; + // 去除头部可能的空白字符(如BOM头或空格) + const actualHeader = nonEmptyLines[0].trim(); + if (actualHeader !== expectedHeader) { - addLog.error(`Header mismatch. Expected: ${expectedHeader}, Got: ${actualHeader}`); + addLog.error(`Header mismatch. Expected: "${expectedHeader}", Got: "${actualHeader}"`); return Promise.reject(evaluationFileErrors); } @@ -48,7 +67,7 @@ export const validateEvaluationFile = async ( return Promise.reject(evaluationFileErrors); } - const headers = csvData[0]; + const headers = lines[0].split(','); // Get required field indices const requiredFields = headers @@ -58,8 +77,8 @@ export const validateEvaluationFile = async ( const errors: string[] = []; // Validate each data row - for (let i = 1; i < csvData.length; i++) { - const values = csvData[i]; + for (let i = 1; i < lines.length; i++) { + const values = lines[i].trim().split(','); // Check required fields requiredFields.forEach(({ header, index }) => { @@ -84,7 +103,7 @@ export const validateEvaluationFile = async ( return Promise.reject(evaluationFileErrors); } - return { csvData, dataLength }; + return { lines, dataLength }; }; const validateRowVariables = ({ @@ -145,3 +164,38 @@ const validateRowVariables = ({ } }); }; + +export const checkTeamHasRunningEvaluation = async (teamId: string) => { + const runningEvaluation = await MongoEvaluation.findOne( + { + teamId: new Types.ObjectId(teamId), + finishTime: { $exists: false } + }, + '_id' + ).lean(); + + if (runningEvaluation) { + return Promise.reject(i18nT('dashboard_evaluation:team_has_running_evaluation')); + } +}; + +export const resumePausedEvaluations = async (teamId: string): Promise => { + return retryFn(async () => { + const pausedEvaluations = await MongoEvaluation.find({ + teamId: new Types.ObjectId(teamId), + errorMessage: TeamErrEnum.aiPointsNotEnough, + finishTime: { $exists: false } + }).lean(); + + if (pausedEvaluations.length === 0) { + return; + } + + for (const evaluation of pausedEvaluations) { + await MongoEvaluation.updateOne({ _id: evaluation._id }, { $unset: { errorMessage: 1 } }); + await addEvaluationJob({ evalId: String(evaluation._id) }); + } + + addLog.info('Resumed paused evaluations', { teamId, count: pausedEvaluations.length }); + }, 3); +}; diff --git a/packages/service/support/permission/evaluation/auth.ts b/packages/service/support/permission/evaluation/auth.ts index c5d4266a3..2d8bbee91 100644 --- a/packages/service/support/permission/evaluation/auth.ts +++ b/packages/service/support/permission/evaluation/auth.ts @@ -3,10 +3,10 @@ import { ManagePermissionVal, ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; -import type { EvaluationSchemaType } from '@fastgpt/global/core/app/evaluation/type'; +import type { EvaluationSchemaType } from '@fastgpt/global/core/evaluation/type'; import type { AuthModeType } from '../type'; -import { MongoEvaluation } from '../../../core/app/evaluation/evalSchema'; import { parseHeaderCert } from '../auth/common'; +import { MongoEvaluation } from '../../../core/evaluation/evalSchema'; export const authEval = async ({ evalId, diff --git a/packages/service/type/env.d.ts b/packages/service/type/env.d.ts index aff70d4ee..2471b807c 100644 --- a/packages/service/type/env.d.ts +++ b/packages/service/type/env.d.ts @@ -46,6 +46,10 @@ declare global { CHAT_LOG_SOURCE_ID_PREFIX?: string; NEXT_PUBLIC_BASE_URL: string; + + // evaluations settings + EVAL_CONCURRENCY?: string; + EVAL_LINE_LIMIT?: string; } } } diff --git a/projects/app/.env.template b/projects/app/.env.template index 5bc671fb2..be2e1cd55 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -101,3 +101,7 @@ CONFIG_JSON_PATH= # CHAT_LOG_SOURCE_ID_PREFIX=fastgpt- +# evaluations settings +EVAL_CONCURRENCY=3 # the number of concurrent evaluations tasks +EVAL_LINE_LIMIT=1000 # the max line number of the uploaded eval data file + diff --git a/projects/app/src/instrumentation.ts b/projects/app/src/instrumentation.ts index 0a69f20c9..267898cbe 100644 --- a/projects/app/src/instrumentation.ts +++ b/projects/app/src/instrumentation.ts @@ -15,6 +15,7 @@ export async function register() { { initVectorStore }, { initRootUser }, { startMongoWatch }, + { initEvaluationWorker }, { startCron }, { startTrainingQueue }, { preLoadWorker }, @@ -29,6 +30,7 @@ export async function register() { import('@fastgpt/service/common/vectorDB/controller'), import('@/service/mongo'), import('@/service/common/system/volumnMongoWatch'), + import('@fastgpt/service/core/evaluation'), import('@/service/common/system/cron'), import('@/service/core/dataset/training/utils'), import('@fastgpt/service/worker/preload'), @@ -59,6 +61,7 @@ export async function register() { ]); startMongoWatch(); + initEvaluationWorker(); startCron(); startTrainingQueue(true); diff --git a/projects/app/src/pageComponents/account/model/AddModelBox.tsx b/projects/app/src/pageComponents/account/model/AddModelBox.tsx index 96eedadf2..34430dd11 100644 --- a/projects/app/src/pageComponents/account/model/AddModelBox.tsx +++ b/projects/app/src/pageComponents/account/model/AddModelBox.tsx @@ -677,16 +677,14 @@ export const ModelEditModal = ({ - {feConfigs?.isPlus && ( - - {t('account_model:use_in_eval')} - - - - - - - )} + + {t('account_model:use_in_eval')} + + + + + + diff --git a/projects/app/src/pageComponents/dashboard/Container.tsx b/projects/app/src/pageComponents/dashboard/Container.tsx index b0b198075..45b4c7101 100644 --- a/projects/app/src/pageComponents/dashboard/Container.tsx +++ b/projects/app/src/pageComponents/dashboard/Container.tsx @@ -191,21 +191,16 @@ const DashboardContainer = ({ groupName: t('common:mcp_server'), children: [] }, - ...(feConfigs?.isPlus - ? [ - { - groupId: TabEnum.evaluation, - groupAvatar: 'kbTest', - groupName: t('common:app_evaluation'), - children: [] - } - ] - : []) + { + groupId: TabEnum.evaluation, + groupAvatar: 'kbTest', + groupName: t('common:app_evaluation'), + children: [] + } ]; }, [ currentType, feConfigs.appTemplateCourse, - feConfigs?.isPlus, i18n.language, pluginGroups, t, diff --git a/projects/app/src/pageComponents/app/evaluation/DetailModal.tsx b/projects/app/src/pageComponents/evaluation/DetailModal.tsx similarity index 98% rename from projects/app/src/pageComponents/app/evaluation/DetailModal.tsx rename to projects/app/src/pageComponents/evaluation/DetailModal.tsx index 8c957c426..904370eab 100644 --- a/projects/app/src/pageComponents/app/evaluation/DetailModal.tsx +++ b/projects/app/src/pageComponents/evaluation/DetailModal.tsx @@ -23,7 +23,7 @@ import { getEvalItemsList, retryEvalItem, updateEvalItem -} from '@/web/core/app/api/evaluation'; +} from '@/web/core/evaluation/evaluation'; import { usePagination } from '@fastgpt/web/hooks/usePagination'; import { downloadFetch, getWebLLMModel } from '@/web/common/system/utils'; import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConfirm'; @@ -33,9 +33,9 @@ import { useForm } from 'react-hook-form'; import { EvaluationStatusMap, EvaluationStatusEnum -} from '@fastgpt/global/core/app/evaluation/constants'; -import type { evaluationType, listEvalItemsItem } from '@fastgpt/global/core/app/evaluation/type'; -import type { updateEvalItemBody } from '@fastgpt/global/core/app/evaluation/api'; +} from '@fastgpt/global/core/evaluation/constants'; +import type { evaluationType, listEvalItemsItem } from '@fastgpt/global/core/evaluation/type'; +import type { updateEvalItemBody } from '@fastgpt/global/core/evaluation/api'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; const formatEvaluationStatus = (item: { status: number; errorMessage?: string }, t: TFunction) => { @@ -129,7 +129,7 @@ const EvaluationDetailModal = ({ const { runAsync: exportEval, loading: isDownloading } = useRequest2(async () => { await downloadFetch({ - url: `/api/proApi/core/app/evaluation/exportItems?evalId=${evalDetail._id}`, + url: `/api/core/evaluation/exportItems?evalId=${evalDetail._id}`, filename: `${evalDetail.name}.csv`, body: { title: t('dashboard_evaluation:evaluation_export_title'), diff --git a/projects/app/src/pages/api/core/evaluation/create.ts b/projects/app/src/pages/api/core/evaluation/create.ts new file mode 100644 index 000000000..97388d6ea --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/create.ts @@ -0,0 +1,159 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { addLog } from '@fastgpt/service/common/system/log'; +import { removeFilesByPaths } from '@fastgpt/service/common/file/utils'; +import { getUploadModel } from '@fastgpt/service/common/file/multer'; +import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; +import { authApp } from '@fastgpt/service/support/permission/app/auth'; +import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils'; +import { createEvaluationUsage } from '@fastgpt/service/support/wallet/usage/controller'; +import { MongoEvaluation } from '@fastgpt/service/core/evaluation/evalSchema'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { addEvaluationJob } from '@fastgpt/service/core/evaluation/mq'; +import { addAuditLog, getI18nAppType } from '@fastgpt/service/support/user/audit/util'; +import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; +import { validateEvaluationFile } from '@fastgpt/service/core/evaluation/utils'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; +import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit'; +import { checkTeamHasRunningEvaluation } from '@fastgpt/service/core/evaluation/utils'; + +export type createEvaluationBody = { + name: string; + appId: string; + evalModel: string; +}; + +const MAX_EVAL_ITEMS = process.env.EVAL_LINE_LIMIT ? Number(process.env.EVAL_LINE_LIMIT) : 1000; + +async function handler(req: ApiRequestProps, res: ApiResponseType) { + const filePaths: string[] = []; + + try { + const upload = getUploadModel({ + maxSize: global.feConfigs?.uploadFileMaxSize + }); + + const { file, data } = await upload.getUploadFile(req, res); + filePaths.push(file.path); + + if (file.mimetype !== 'text/csv') { + return Promise.reject('File must be a CSV file'); + } + + const { teamId, tmbId, app } = await authApp({ + req, + authToken: true, + authApiKey: true, + per: ReadPermissionVal, + appId: data.appId + }); + await checkTeamAIPoints(teamId); + await checkTeamHasRunningEvaluation(teamId); + + const { rawText } = await readRawTextByLocalFile({ + teamId, + tmbId, + path: file.path, + encoding: file.encoding, + getFormatText: false + }); + removeFilesByPaths(filePaths); + + const appVariables = app.chatConfig.variables; + + const { lines } = await validateEvaluationFile(rawText, appVariables); + + if (lines.length - 1 > MAX_EVAL_ITEMS) { + return Promise.reject(`File must be less than ${MAX_EVAL_ITEMS} lines`); + } + + const headers = lines[0].split(','); + const qIndex = headers.findIndex((h) => h.trim() === '*q'); + const aIndex = headers.findIndex((h) => h.trim() === '*a'); + const historyIndex = headers.findIndex((h) => h.trim() === 'history'); + + const { usageId } = await createEvaluationUsage({ + teamId, + tmbId, + appName: app.name, + model: data.evalModel + }); + + const evalItems = lines.slice(1).map((line) => { + const values = line.split(','); + const question = values[qIndex]; + const expectedResponse = values[aIndex]; + const history = historyIndex !== -1 ? values[historyIndex] : ''; + + const globalVariables = headers.slice(0, qIndex).reduce( + (acc, header, j) => { + const headerName = header.trim().replace(/^\*/, ''); + acc[headerName] = values[j] || ''; + return acc; + }, + {} as Record + ); + + return { + question, + expectedResponse, + history, + globalVariables + }; + }); + + await mongoSessionRun(async (session) => { + const [evaluation] = await MongoEvaluation.create( + [ + { + teamId, + tmbId, + appId: data.appId, + usageId, + evalModel: data.evalModel, + name: data.name + } + ], + { session, ordered: true } + ); + + const evalItemsWithId = evalItems.map((item) => ({ + question: item.question, + expectedResponse: item.expectedResponse, + history: item.history, + globalVariables: item.globalVariables, + evalId: evaluation._id, + status: EvaluationStatusEnum.queuing + })); + await MongoEvalItem.insertMany(evalItemsWithId, { + session, + ordered: false + }); + + await addEvaluationJob({ evalId: evaluation._id }); + }); + + addAuditLog({ + tmbId, + teamId, + event: AuditEventEnum.CREATE_EVALUATION, + params: { + name: data.name, + appName: app.name + } + }); + } catch (error) { + addLog.error(`create evaluation error: ${error}`); + removeFilesByPaths(filePaths); + return Promise.reject(error); + } +} + +export default NextAPI(handler); + +export const config = { + api: { + bodyParser: false + } +}; diff --git a/projects/app/src/pages/api/core/evaluation/delete.ts b/projects/app/src/pages/api/core/evaluation/delete.ts new file mode 100644 index 000000000..2a322b12d --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/delete.ts @@ -0,0 +1,53 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { MongoEvaluation } from '@fastgpt/service/core/evaluation/evalSchema'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; +import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; +import { removeEvaluationJob } from '@fastgpt/service/core/evaluation/mq'; +import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; +import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'; + +async function handler(req: ApiRequestProps<{}, { evalId: string }>, res: ApiResponseType) { + const { evalId } = req.query; + + const { tmbId, teamId, evaluation } = await authEval({ + req, + per: WritePermissionVal, + evalId, + authToken: true, + authApiKey: true + }); + + await mongoSessionRun(async (session) => { + await MongoEvaluation.deleteOne( + { + _id: evalId + }, + { session } + ); + + await MongoEvalItem.deleteMany( + { + evalId + }, + { session } + ); + + await removeEvaluationJob(evalId); + }); + + addAuditLog({ + tmbId, + teamId, + event: AuditEventEnum.DELETE_EVALUATION, + params: { + name: evaluation.name + } + }); + + return {}; +} + +export default NextAPI(handler); diff --git a/projects/app/src/pages/api/core/evaluation/deleteItem.ts b/projects/app/src/pages/api/core/evaluation/deleteItem.ts new file mode 100644 index 000000000..77182c83f --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/deleteItem.ts @@ -0,0 +1,23 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'; + +async function handler( + req: ApiRequestProps<{}, { evalId: string; itemId: string }>, + res: ApiResponseType +) { + const { evalId, itemId } = req.query; + await authEval({ + req, + per: WritePermissionVal, + evalId, + authToken: true, + authApiKey: true + }); + + await MongoEvalItem.deleteOne({ _id: itemId, evalId }); +} + +export default NextAPI(handler); diff --git a/projects/app/src/pages/api/core/evaluation/exportItems.ts b/projects/app/src/pages/api/core/evaluation/exportItems.ts new file mode 100644 index 000000000..05811137d --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/exportItems.ts @@ -0,0 +1,112 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; +import { MongoEvaluation } from '@fastgpt/service/core/evaluation/evalSchema'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { Types } from 'mongoose'; +import { readFromSecondary } from '@fastgpt/service/common/mongo/utils'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import { addAuditLog, getI18nAppType } from '@fastgpt/service/support/user/audit/util'; +import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; +import { generateCsv } from '@fastgpt/service/common/file/csv'; + +export type exportItemsQuery = { + evalId: string; +}; + +export type exportItemsBody = { + title: string; + statusMap: Record; +}; + +async function handler( + req: ApiRequestProps, + res: ApiResponseType +) { + const { evalId } = req.query; + const { title, statusMap } = req.body || {}; + + const { teamId, tmbId } = await authEval({ + req, + per: ReadPermissionVal, + evalId, + authToken: true, + authApiKey: true + }); + + const evaluation = await MongoEvaluation.findById(evalId); + if (!evaluation) { + return Promise.reject('Evaluation task does not exist'); + } + + res.setHeader('Content-Type', 'text/csv; charset=utf-8;'); + res.setHeader( + 'Content-Disposition', + `attachment; filename=${encodeURIComponent(evaluation?.name || 'evaluation')}.csv;` + ); + + const evalItems = await MongoEvalItem.find( + { + evalId: new Types.ObjectId(evalId) + }, + 'globalVariables question expectedResponse response status accuracy relevance semanticAccuracy score errorMessage', + { + ...readFromSecondary + } + ); + + const allVariableKeys = new Set(); + evalItems.forEach((doc) => { + if (doc.globalVariables) { + Object.keys(doc.globalVariables).forEach((key) => allVariableKeys.add(key)); + } + }); + const variableKeysArray = Array.from(allVariableKeys).sort(); + + const baseHeaders = title.split(','); + const headers = [...variableKeysArray, ...baseHeaders]; + + const data = evalItems.map((doc) => { + const question = doc.question || ''; + const expectedResponse = doc.expectedResponse || ''; + const response = doc.response || ''; + + const status = (() => { + if (doc.errorMessage) { + return 'Error'; // Show error when errorMessage exists + } + return statusMap[doc.status]?.label || 'Unknown'; + })(); + + const score = !!doc.score ? doc.score.toFixed(2) : '0'; + + const variableValues = variableKeysArray.map((key) => { + return doc.globalVariables?.[key] || ''; + }); + + return [...variableValues, question, expectedResponse, response, status, score]; + }); + + const csvContent = generateCsv(headers, data); + + res.write('\uFEFF' + csvContent); + + addAuditLog({ + tmbId, + teamId, + event: AuditEventEnum.EXPORT_EVALUATION, + params: { + name: evaluation.name + } + }); + + res.end(); +} + +export default NextAPI(handler); + +export const config = { + api: { + responseLimit: '100mb' + } +}; diff --git a/projects/app/src/pages/api/core/evaluation/list.ts b/projects/app/src/pages/api/core/evaluation/list.ts new file mode 100644 index 000000000..7be499501 --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/list.ts @@ -0,0 +1,194 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { authUserPer } from '@fastgpt/service/support/permission/user/auth'; +import { + ReadPermissionVal, + PerResourceTypeEnum +} from '@fastgpt/global/support/permission/constant'; +import { MongoEvaluation } from '@fastgpt/service/core/evaluation/evalSchema'; +import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination'; +import { Types } from '@fastgpt/service/common/mongo'; +import type { PaginationResponse } from '@fastgpt/web/common/fetch/type'; +import type { listEvaluationsBody } from '@fastgpt/global/core/evaluation/api'; +import type { EvaluationSchemaType, evaluationType } from '@fastgpt/global/core/evaluation/type'; +import { replaceRegChars } from '@fastgpt/global/common/string/tools'; +import { MongoResourcePermission } from '@fastgpt/service/support/permission/schema'; +import { getGroupsByTmbId } from '@fastgpt/service/support/permission/memberGroup/controllers'; +import { getOrgIdSetWithParentByTmbId } from '@fastgpt/service/support/permission/org/controllers'; +import type { TeamMemberSchema } from '@fastgpt/global/support/user/team/type'; +import type { AppSchema } from '@fastgpt/global/core/app/type'; +import { i18nT } from '@fastgpt/web/i18n/utils'; +import { MongoApp } from '@fastgpt/service/core/app/schema'; + +async function handler( + req: ApiRequestProps, + res: ApiResponseType +): Promise> { + const { + teamId, + tmbId, + permission: teamPer + } = await authUserPer({ + req, + authToken: true, + authApiKey: true, + per: ReadPermissionVal + }); + + const { offset, pageSize } = parsePaginationRequest(req); + const { searchKey } = req.body; + + const [perList, myGroupMap, myOrgSet] = await Promise.all([ + MongoResourcePermission.find({ + resourceType: PerResourceTypeEnum.app, + teamId, + resourceId: { + $exists: true + } + }).lean(), + getGroupsByTmbId({ + tmbId, + teamId + }).then((item) => { + const map = new Map(); + item.forEach((item) => { + map.set(String(item._id), 1); + }); + return map; + }), + getOrgIdSetWithParentByTmbId({ + teamId, + tmbId + }) + ]); + const myPerAppIdList = perList + .filter( + (item) => + String(item.tmbId) === String(tmbId) || + myGroupMap.has(String(item.groupId)) || + myOrgSet.has(String(item.orgId)) + ) + .map((item) => new Types.ObjectId(item.resourceId)); + + const myAppIds = await MongoApp.find({ + teamId: new Types.ObjectId(teamId), + $or: [{ tmbId }, { parentId: { $in: myPerAppIdList } }] + }) + .select('_id') + .lean(); + + const match = { + teamId: new Types.ObjectId(teamId), + ...(searchKey && { name: { $regex: new RegExp(`${replaceRegChars(searchKey)}`, 'i') } }), + ...(!teamPer.isOwner && { + appId: { + $in: [...myPerAppIdList, ...myAppIds.map((item) => item._id)] + } + }) + }; + + const [evaluations, total] = await Promise.all([ + MongoEvaluation.aggregate( + buildPipeline(match, offset, pageSize) + ) as unknown as (EvaluationSchemaType & { + teamMember: TeamMemberSchema; + app: AppSchema; + stats: { + totalCount: number; + completedCount: number; + errorCount: number; + avgScore: number; + }; + })[], + MongoEvaluation.countDocuments(match) + ]); + + return { + total, + list: evaluations.map((item) => { + const { stats } = item; + const { totalCount = 0, completedCount = 0, errorCount = 0, avgScore } = stats || {}; + + const calculatedScore = totalCount === completedCount ? avgScore || 0 : undefined; + + return { + name: item.name, + appId: String(item.appId), + createTime: item.createTime, + finishTime: item.finishTime, + evalModel: item.evalModel, + errorMessage: item.errorMessage, + score: calculatedScore, + _id: String(item._id), + executorAvatar: item.teamMember?.avatar, + executorName: item.teamMember?.name, + appAvatar: item.app?.avatar, + appName: item.app?.name || i18nT('app:deleted'), + completedCount, + errorCount, + totalCount + }; + }) + }; +} + +const buildPipeline = (match: Record, offset: number, pageSize: number) => [ + { $match: match }, + { $sort: { createTime: -1 as const } }, + { $skip: offset }, + { $limit: pageSize }, + { + $lookup: { + from: 'team_members', + localField: 'tmbId', + foreignField: '_id', + as: 'teamMember' + } + }, + { + $lookup: { + from: 'apps', + localField: 'appId', + foreignField: '_id', + as: 'app' + } + }, + { + $lookup: { + from: 'eval_items', + let: { evalId: '$_id' }, + pipeline: [ + { $match: { $expr: { $eq: ['$evalId', '$$evalId'] } } }, + { + $group: { + _id: null, + totalCount: { $sum: 1 }, + completedCount: { + $sum: { $cond: [{ $eq: ['$status', 2] }, 1, 0] } + }, + errorCount: { + $sum: { + $cond: [{ $ifNull: ['$errorMessage', false] }, 1, 0] + } + }, + avgScore: { + $avg: { + $cond: [{ $ne: ['$score', null] }, '$score', '$$REMOVE'] + } + } + } + } + ], + as: 'evalStats' + } + }, + { + $addFields: { + teamMember: { $arrayElemAt: ['$teamMember', 0] }, + app: { $arrayElemAt: ['$app', 0] }, + stats: { $arrayElemAt: ['$evalStats', 0] } + } + } +]; + +export default NextAPI(handler); diff --git a/projects/app/src/pages/api/core/evaluation/listItems.ts b/projects/app/src/pages/api/core/evaluation/listItems.ts new file mode 100644 index 000000000..a1f1078fb --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/listItems.ts @@ -0,0 +1,83 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { Types } from 'mongoose'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import type { listEvalItemsBody } from '@fastgpt/global/core/evaluation/api'; +import type { listEvalItemsItem } from '@fastgpt/global/core/evaluation/type'; +import type { PaginationResponse } from '@fastgpt/web/common/fetch/type'; +import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant'; + +async function handler( + req: ApiRequestProps, + res: ApiResponseType +): Promise> { + const { evalId } = req.body; + await authEval({ + req, + per: ReadPermissionVal, + evalId, + authToken: true, + authApiKey: true + }); + const { offset, pageSize } = parsePaginationRequest(req); + + const aggregationPipeline = [ + { + $match: { + evalId: new Types.ObjectId(evalId) + } + }, + { + $addFields: { + sortStatus: { + $switch: { + branches: [ + { case: { $ifNull: ['$errorMessage', false] }, then: 0 }, + { case: { $eq: ['$status', 1] }, then: 1 }, + { case: { $eq: ['$status', 0] }, then: 2 }, + { case: { $eq: ['$status', 2] }, then: 3 } + ], + default: 4 + } + } + } + }, + { + $sort: { sortStatus: 1 as const, _id: 1 as const } + }, + { + $skip: offset + }, + { + $limit: pageSize + } + ]; + + const [result, total] = await Promise.all([ + MongoEvalItem.aggregate(aggregationPipeline), + MongoEvalItem.countDocuments({ evalId }) + ]); + + return { + total, + list: result.map((item) => ({ + evalItemId: String(item._id), + evalId: String(item.evalId), + retry: item.retry, + question: item.question, + expectedResponse: item.expectedResponse, + response: item.response, + globalVariables: item.globalVariables, + status: item.status, + errorMessage: item.errorMessage, + accuracy: item.accuracy, + relevance: item.relevance, + semanticAccuracy: item.semanticAccuracy, + score: item.score + })) + }; +} + +export default NextAPI(handler); diff --git a/projects/app/src/pages/api/core/evaluation/retryItem.ts b/projects/app/src/pages/api/core/evaluation/retryItem.ts new file mode 100644 index 000000000..897df6bfd --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/retryItem.ts @@ -0,0 +1,45 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { checkEvaluationJobActive, addEvaluationJob } from '@fastgpt/service/core/evaluation/mq'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import type { retryEvalItemBody } from '@fastgpt/global/core/evaluation/api'; +import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit'; +import { WritePermissionVal } from '@fastgpt/global/support/permission/constant'; + +async function handler(req: ApiRequestProps, res: ApiResponseType) { + const { evalItemId } = req.body; + + const evaluationItem = await MongoEvalItem.findById(evalItemId); + if (!evaluationItem) return Promise.reject('evaluationItem not found'); + + const { teamId, evaluation } = await authEval({ + req, + per: WritePermissionVal, + evalId: evaluationItem.evalId, + authToken: true, + authApiKey: true + }); + + await checkTeamAIPoints(teamId); + + await MongoEvalItem.updateOne( + { _id: evalItemId }, + { + $set: { + status: EvaluationStatusEnum.queuing, + errorMessage: null, + response: null, + accuracy: null, + relevance: null, + semanticAccuracy: null, + score: null, + retry: 3 + } + } + ); + await addEvaluationJob({ evalId: evaluation._id }); +} + +export default NextAPI(handler); diff --git a/projects/app/src/pages/api/core/evaluation/updateItem.ts b/projects/app/src/pages/api/core/evaluation/updateItem.ts new file mode 100644 index 000000000..cb1a2a32c --- /dev/null +++ b/projects/app/src/pages/api/core/evaluation/updateItem.ts @@ -0,0 +1,46 @@ +import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next'; +import { NextAPI } from '@/service/middleware/entry'; +import { authEval } from '@fastgpt/service/support/permission/evaluation/auth'; +import { MongoEvalItem } from '@fastgpt/service/core/evaluation/evalItemSchema'; +import { addEvaluationJob } from '@fastgpt/service/core/evaluation/mq'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import type { updateEvalItemBody } from '@fastgpt/global/core/evaluation/api'; +import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit'; + +async function handler(req: ApiRequestProps, res: ApiResponseType) { + const { evalItemId, question, expectedResponse, variables } = req.body; + + const evaluationItem = await MongoEvalItem.findById(evalItemId); + if (!evaluationItem) return Promise.reject('evaluationItem not found'); + + const { teamId, evaluation } = await authEval({ + req, + evalId: evaluationItem.evalId, + authToken: true, + authApiKey: true + }); + await checkTeamAIPoints(teamId); + + await MongoEvalItem.updateOne( + { _id: evalItemId }, + { + $set: { + question, + expectedResponse, + status: EvaluationStatusEnum.queuing, + errorMessage: null, + response: null, + accuracy: null, + relevance: null, + semanticAccuracy: null, + score: null, + retry: 3, + globalVariables: variables + } + } + ); + + await addEvaluationJob({ evalId: evaluation._id }); +} + +export default NextAPI(handler); diff --git a/projects/app/src/pages/dashboard/evaluation/create.tsx b/projects/app/src/pages/dashboard/evaluation/create.tsx index a956719ea..30f0a37dc 100644 --- a/projects/app/src/pages/dashboard/evaluation/create.tsx +++ b/projects/app/src/pages/dashboard/evaluation/create.tsx @@ -20,11 +20,11 @@ import { getAppDetailById } from '@/web/core/app/api'; import { useToast } from '@fastgpt/web/hooks/useToast'; import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip'; import { fileDownload } from '@/web/common/file/utils'; -import { postCreateEvaluation } from '@/web/core/app/api/evaluation'; +import { postCreateEvaluation } from '@/web/core/evaluation/evaluation'; import { useMemo, useState } from 'react'; import Markdown from '@/components/Markdown'; -import { getEvaluationFileHeader } from '@fastgpt/global/core/app/evaluation/utils'; -import { evaluationFileErrors } from '@fastgpt/global/core/app/evaluation/constants'; +import { getEvaluationFileHeader } from '@fastgpt/global/core/evaluation/utils'; +import { evaluationFileErrors } from '@fastgpt/global/core/evaluation/constants'; import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; import { getErrText } from '@fastgpt/global/common/error/utils'; diff --git a/projects/app/src/pages/dashboard/evaluation/index.tsx b/projects/app/src/pages/dashboard/evaluation/index.tsx index 5cbc16b1e..cb9b9c00e 100644 --- a/projects/app/src/pages/dashboard/evaluation/index.tsx +++ b/projects/app/src/pages/dashboard/evaluation/index.tsx @@ -20,15 +20,15 @@ import SearchInput from '@fastgpt/web/components/common/Input/SearchInput'; import MyIcon from '@fastgpt/web/components/common/Icon'; import { useRouter } from 'next/router'; import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; -import { deleteEvaluation, getEvaluationList } from '@/web/core/app/api/evaluation'; +import { deleteEvaluation, getEvaluationList } from '@/web/core/evaluation/evaluation'; import { formatTime2YMDHM } from '@fastgpt/global/common/string/time'; import Avatar from '@fastgpt/web/components/common/Avatar'; import { usePagination } from '@fastgpt/web/hooks/usePagination'; import { useState, useEffect, useMemo } from 'react'; -import EvaluationDetailModal from '../../../pageComponents/app/evaluation/DetailModal'; +import EvaluationDetailModal from '@/pageComponents/evaluation/DetailModal'; import { useSystem } from '@fastgpt/web/hooks/useSystem'; import EmptyTip from '@fastgpt/web/components/common/EmptyTip'; -import type { evaluationType } from '@fastgpt/global/core/app/evaluation/type'; +import type { evaluationType } from '@fastgpt/global/core/evaluation/type'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConfirm'; diff --git a/projects/app/src/web/core/app/api/evaluation.ts b/projects/app/src/web/core/evaluation/evaluation.ts similarity index 67% rename from projects/app/src/web/core/app/api/evaluation.ts rename to projects/app/src/web/core/evaluation/evaluation.ts index 5caf12f10..814c053e1 100644 --- a/projects/app/src/web/core/app/api/evaluation.ts +++ b/projects/app/src/web/core/evaluation/evaluation.ts @@ -4,8 +4,8 @@ import type { listEvaluationsBody, retryEvalItemBody, updateEvalItemBody -} from '@fastgpt/global/core/app/evaluation/api'; -import type { evaluationType, listEvalItemsItem } from '@fastgpt/global/core/app/evaluation/type'; +} from '@fastgpt/global/core/evaluation/api'; +import type { evaluationType, listEvalItemsItem } from '@fastgpt/global/core/evaluation/type'; import type { PaginationResponse } from '@fastgpt/web/common/fetch/type'; export const postCreateEvaluation = ({ @@ -25,7 +25,7 @@ export const postCreateEvaluation = ({ formData.append('file', file, encodeURIComponent(file.name)); formData.append('data', JSON.stringify({ name, evalModel, appId })); - return POST(`/proApi/core/app/evaluation/create`, formData, { + return POST(`/core/evaluation/create`, formData, { timeout: 600000, onUploadProgress: (e) => { if (!e.total) return; @@ -40,19 +40,18 @@ export const postCreateEvaluation = ({ }; export const getEvaluationList = (data: listEvaluationsBody) => - POST>('/proApi/core/app/evaluation/list', data); + POST>('/core/evaluation/list', data); export const deleteEvaluation = (data: { evalId: string }) => - DELETE('/proApi/core/app/evaluation/delete', data); + DELETE('/core/evaluation/delete', data); export const getEvalItemsList = (data: listEvalItemsBody) => - POST>('/proApi/core/app/evaluation/listItems', data); + POST>('/core/evaluation/listItems', data); export const deleteEvalItem = (data: { evalItemId: string }) => - DELETE('/proApi/core/app/evaluation/deleteItem', data); + DELETE('/core/evaluation/deleteItem', data); -export const retryEvalItem = (data: retryEvalItemBody) => - POST('/proApi/core/app/evaluation/retryItem', data); +export const retryEvalItem = (data: retryEvalItemBody) => POST('/core/evaluation/retryItem', data); export const updateEvalItem = (data: updateEvalItemBody) => - POST('/proApi/core/app/evaluation/updateItem', data); + POST('/core/evaluation/updateItem', data);