mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-26 04:32:50 +00:00
- Adds a lightweight evaluation framework for app-level tracking and benchmarking. - Changes: 28 files, +1455 additions, -66 deletions. - Branch: add-evaluations -> main. - PR: https://github.com/chanzhi82020/FastGPT/pull/1 Applications built on FastGPT need repeatable, comparable benchmarks to measure regressions, track improvements, and validate releases. This initial implementation provides the primitives to define evaluation scenarios, run them against app endpoints or model components, and persist results for later analysis. I updated the PR description to emphasize that the evaluation system is targeted at FastGPT-built apps and expanded the explanation of the core pieces so reviewers understand the scope and intended use. The new description outlines the feature intent, core components, and how results are captured and aggregated for benchmarking. - Evaluation definitions - Define evaluation tasks that reference an app (app id, version, endpoint), test datasets or input cases, expected outputs (when applicable), and run configuration (parallelism, timeouts). - Support for custom metric plugins so teams can add domain-specific measures. - Runner / Executor - Executes evaluation cases against app endpoints or internal model interfaces. - Captures raw responses, response times, status codes, and any runtime errors. - Computes per-case metrics (e.g., correctness, latency) immediately after each case run. - Metrics & Aggregation - Built-in metrics: accuracy/success rate, latency (p50/p90/p99), throughput, error rate. - Aggregation produces per-run summaries and per-app historical summaries for trend analysis. - Allows combining metrics into composite scores for high-level benchmarking. - Persistence & Logging - Stores run results, input/output pairs (when needed), timestamps, environment info, and app/version metadata so runs are reproducible and auditable. - Logs are retained to facilitate debugging and root-cause analysis of regressions. - Reporting & Comparison - Produces aggregated reports suitable for CI gating, release notes, or dashboards. - Supports comparing multiple app versions or deployments side-by-side. - Extensibility & Integration - Designed to plug into CI (automated runs on PRs or releases), dashboards, and downstream analysis tools. - Easy to add new metrics, evaluators, or dataset connectors. By centering the evaluation system on FastGPT apps, teams can benchmark full application behavior (not only raw model outputs), correlate metrics with deployment configurations, and make informed release decisions. - Expand built-in metric suite (e.g., F1, BLEU/ROUGE where applicable), add dataset connectors, and provide example evaluation scenarios for sample apps. - Integrate with CI pipelines and add basic dashboarding for trend visualization. Related Issue: N/A Co-authored-by: Archer <545436317@qq.com>
108 lines
3.2 KiB
Plaintext
108 lines
3.2 KiB
Plaintext
LOG_DEPTH=3
|
||
# 默认用户密码,用户名为 root,每次重启时会自动更新。
|
||
DEFAULT_ROOT_PSW=123456
|
||
# 数据库最大连接数
|
||
DB_MAX_LINK=5
|
||
TOKEN_KEY=fastgpt
|
||
# 文件阅读时的密钥
|
||
FILE_TOKEN_KEY=filetokenkey
|
||
# 密钥加密key
|
||
AES256_SECRET_KEY=fastgptsecret
|
||
# root key, 最高权限
|
||
ROOT_KEY=fdafasd
|
||
# 强制将图片转成 base64 传递给模型
|
||
MULTIPLE_DATA_TO_BASE64=true
|
||
|
||
# 临时解决-emb 模型单次并发量
|
||
EMBEDDING_CHUNK_SIZE=10
|
||
|
||
# 是否隐藏版权信息配置,只有值为 'true' 时隐藏
|
||
HIDE_CHAT_COPYRIGHT_SETTING=
|
||
|
||
# Service url
|
||
# 商业版地址
|
||
PRO_URL=
|
||
# Plugin
|
||
PLUGIN_BASE_URL=
|
||
PLUGIN_TOKEN=
|
||
# code sandbox url
|
||
SANDBOX_URL=http://localhost:3001
|
||
# ai proxy api
|
||
AIPROXY_API_ENDPOINT=https://xxx.come
|
||
AIPROXY_API_TOKEN=xxxxx
|
||
# OpenAI Base URL
|
||
OPENAI_BASE_URL=https://api.openai.com/v1
|
||
CHAT_API_KEY=sk-xxxx
|
||
|
||
# Redis URL
|
||
REDIS_URL=redis://default:password@127.0.0.1:6379
|
||
# mongo 数据库连接参数,本地开发连接远程数据库时,可能需要增加 directConnection=true 参数,才能连接上。
|
||
MONGODB_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=admin
|
||
# 日志库
|
||
MONGODB_LOG_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=admin
|
||
# 向量库优先级: pg > oceanbase > milvus
|
||
# PG 向量库连接参数
|
||
PG_URL=postgresql://username:password@host:port/postgres
|
||
# OceanBase 向量库连接参数
|
||
OCEANBASE_URL=
|
||
# milvus 向量库连接参数
|
||
MILVUS_ADDRESS=
|
||
MILVUS_TOKEN=
|
||
|
||
|
||
# 页面的地址,用于自动补全相对路径资源的 domain,注意后面不要跟 /
|
||
FE_DOMAIN=http://localhost:3000
|
||
# 文件域名,也是指向 FastGPT 服务,但是如果希望内容足够安全,可以独立分配一个域名,避免高危文件读取到主域名的内容。
|
||
FILE_DOMAIN=http://localhost:3000
|
||
# 二级路由,需要打包时候就确定
|
||
# NEXT_PUBLIC_BASE_URL=/fastai
|
||
|
||
# 日志等级: debug, info, warn, error
|
||
LOG_LEVEL=debug
|
||
STORE_LOG_LEVEL=warn
|
||
|
||
# Signoz
|
||
SIGNOZ_BASE_URL=
|
||
SIGNOZ_SERVICE_NAME=
|
||
SIGNOZ_STORE_LEVEL=warn
|
||
|
||
# 安全配置
|
||
# 对话文件 n 天过期
|
||
CHAT_FILE_EXPIRE_TIME=7
|
||
# 启动 IP 限流(true),部分接口增加了 ip 限流策略,防止非正常请求操作。
|
||
USE_IP_LIMIT=false
|
||
# 工作流最大运行次数,避免极端的死循环情况
|
||
WORKFLOW_MAX_RUN_TIMES=500
|
||
# 循环最大运行次数,避免极端的死循环情况
|
||
WORKFLOW_MAX_LOOP_TIMES=50
|
||
# 启用内网 IP 检查
|
||
CHECK_INTERNAL_IP=false
|
||
# 密码错误锁时长:s
|
||
PASSWORD_LOGIN_LOCK_SECONDS=
|
||
# 密码过期月份,不设置则不会过期
|
||
PASSWORD_EXPIRED_MONTH=
|
||
# 最大登录客户端数量,默认为 10
|
||
MAX_LOGIN_SESSION=
|
||
|
||
# 特殊配置
|
||
# 自定义跨域,不配置时,默认都允许跨域(逗号分割)
|
||
ALLOWED_ORIGINS=
|
||
# 是否展示兑换码功能
|
||
SHOW_COUPON=false
|
||
# 自定义 config.json 路径
|
||
CONFIG_JSON_PATH=
|
||
|
||
# 对话日志推送服务
|
||
# # 日志服务地址
|
||
# CHAT_LOG_URL=http://localhost:8080
|
||
# # 日志推送间隔
|
||
# CHAT_LOG_INTERVAL=10000
|
||
# # 日志来源ID前缀
|
||
# CHAT_LOG_SOURCE_ID_PREFIX=fastgpt-
|
||
|
||
|
||
# evaluations settings
|
||
EVAL_CONCURRENCY=3 # the number of concurrent evaluations tasks
|
||
EVAL_LINE_LIMIT=1000 # the max line number of the uploaded eval data file
|
||
|