perf: vector count api

This commit is contained in:
archer 2025-12-09 17:56:29 +08:00
parent 397ffc6c19
commit 8ab756ab22
No known key found for this signature in database
GPG Key ID: 4446499B846D4A9E
14 changed files with 437 additions and 126 deletions

View File

@ -0,0 +1,22 @@
import { createDocument } from 'zod-openapi';
import { DashboardPath } from './admin/core/dashboard';
import { TagsMap } from './tag';
export const adminOpenAPIDocument = createDocument({
openapi: '3.1.0',
info: {
title: 'FastGPT Admin API',
version: '0.1.0',
description: 'FastGPT Admin API 文档'
},
paths: {
...DashboardPath
},
servers: [{ url: '/api' }],
'x-tagGroups': [
{
name: '仪表盘',
tags: [TagsMap.adminDashboard]
}
]
});

View File

@ -0,0 +1,105 @@
import { z } from 'zod';
import { UsageSourceEnum } from '../../../../support/wallet/usage/constants';
// Common query schema
export const GetDataChartsQuerySchema = z.object({
startTime: z.string().meta({ description: '查询起始时间ISO 8601 格式)' }),
sources: z.array(z.enum(UsageSourceEnum)).optional().meta({ description: '使用来源筛选' })
});
export type GetDataChartsQueryType = z.infer<typeof GetDataChartsQuerySchema>;
// Get user form data response
export const RegisteredUserCountSchema = z.object({
date: z.string().meta({ description: '注册日期' }),
count: z.number().meta({ description: '该日期注册的用户数' })
});
export const GetUserFormDataResponseSchema = z.object({
startUserCount: z.number().meta({ description: '起始时间之前的用户总数' }),
registeredUserCount: z.array(RegisteredUserCountSchema).meta({ description: '用户注册时间序列' })
});
export type GetUserFormDataResponseType = z.infer<typeof GetUserFormDataResponseSchema>;
// Get Pays Form Data Response
export const OrderAmountSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '订单总数' }),
successCount: z.number().meta({ description: '成功订单数' })
});
export const PayAmountSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '支付总金额' })
});
export const PayTeamSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '支付团队数' })
});
export const GetPaysFormDataResponseSchema = z.object({
orderAmounts: z.array(OrderAmountSchema).meta({ description: '订单数量时间序列' }),
payAmounts: z.array(PayAmountSchema).meta({ description: '支付金额时间序列' }),
payTeams: z.array(PayTeamSchema).meta({ description: '支付团队时间序列' })
});
export type GetPaysFormDataResponseType = z.infer<typeof GetPaysFormDataResponseSchema>;
// Get chat form data response
export const ChatAmountSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '对话总数' })
});
export const ChatItemAmountSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '对话消息总数' }),
averageCount: z.number().meta({ description: '每个对话的平均消息数' })
});
export const GetChatFormDataResponseSchema = z.object({
chatAmounts: z.array(ChatAmountSchema).meta({ description: '对话数量时间序列' }),
chatItemAmounts: z.array(ChatItemAmountSchema).meta({ description: '对话消息数量时间序列' })
});
export type GetChatFormDataResponseType = z.infer<typeof GetChatFormDataResponseSchema>;
// Get QPM range distribution response
export const QpmRangeSchema = z.object({
range: z.string().meta({ description: 'QPM 范围标签' }),
count: z.number().meta({ description: '范围内的团队数量' })
});
export const GetQpmRangeResponseSchema = z.object({
ranges: z.array(QpmRangeSchema).meta({ description: 'QPM 范围统计列表' })
});
export type GetQpmRangeResponseType = z.infer<typeof GetQpmRangeResponseSchema>;
// Get cost form data response
export const PointUsageSchema = z.object({
date: z.string().meta({ description: '数据点日期' }),
totalCount: z.number().meta({ description: '积分使用总数' })
});
export const GetCostFormDataResponseSchema = z.object({
pointUsages: z.array(PointUsageSchema).meta({ description: '积分使用时间序列' })
});
export type GetCostFormDataResponseType = z.infer<typeof GetCostFormDataResponseSchema>;
// Get user stats response
export const GetUserStatsResponseSchema = z.object({
usersCount: z.number().meta({ description: '用户总数' }),
rechargeCount: z.number().meta({ description: '充值总数' })
});
export type GetUserStatsResponseType = z.infer<typeof GetUserStatsResponseSchema>;
// Get app stats response
export const GetAppStatsResponseSchema = z.object({
workflowCount: z.number().meta({ description: '工作流总数' }),
simpleAppCount: z.number().meta({ description: '简易应用总数' }),
workflowToolCount: z.number().meta({ description: '工作流工具总数' }),
httpToolCount: z.number().meta({ description: 'HTTP 工具总数' }),
mcpToolCount: z.number().meta({ description: 'MCP 工具总数' })
});
export type GetAppStatsResponseType = z.infer<typeof GetAppStatsResponseSchema>;
// Get dataset stats response
export const GetDatasetStatsResponseSchema = z.object({
commonDatasetCount: z.number().meta({ description: '通用知识库总数' }),
websiteDatasetCount: z.number().meta({ description: 'Web 站点同步总数' }),
apiDatasetCount: z.number().meta({ description: 'API 知识库总数' }),
yuqueDatasetCount: z.number().meta({ description: '语雀知识库总数' }),
feishuDatasetCount: z.number().meta({ description: '飞书知识库总数' }),
totalIndexCount: z.number().meta({ description: '索引总量' })
});
export type GetDatasetStatsResponseType = z.infer<typeof GetDatasetStatsResponseSchema>;

View File

@ -0,0 +1,190 @@
import { z } from 'zod';
import type { OpenAPIPath } from '../../../type';
import {
GetDataChartsQuerySchema,
GetChatFormDataResponseSchema,
GetCostFormDataResponseSchema,
GetPaysFormDataResponseSchema,
GetUserFormDataResponseSchema,
GetQpmRangeResponseSchema,
GetUserStatsResponseSchema,
GetAppStatsResponseSchema,
GetDatasetStatsResponseSchema
} from './api';
import { TagsMap } from '../../../tag';
export * from './api';
export const DashboardPath: OpenAPIPath = {
'/admin/core/dashboard/getUserStats': {
get: {
summary: '获取用户全局统计',
description: '获取用户总数和充值总数',
tags: [TagsMap.adminDashboard],
responses: {
200: {
description: '成功获取用户统计',
content: {
'application/json': {
schema: GetUserStatsResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getAppStats': {
get: {
summary: '获取应用全局统计',
description: '获取工作流、简易应用、工作流工具、HTTP 工具和 MCP 工具的总数',
tags: [TagsMap.adminDashboard],
responses: {
200: {
description: '成功获取应用统计',
content: {
'application/json': {
schema: GetAppStatsResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getDatasetStats': {
get: {
summary: '获取知识库全局统计',
description: '获取通用知识库、Web 站点同步、API、语雀、飞书知识库的总数以及索引总量',
tags: [TagsMap.adminDashboard],
responses: {
200: {
description: '成功获取知识库统计',
content: {
'application/json': {
schema: GetDatasetStatsResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getChatFormData': {
get: {
summary: '获取对话统计数据',
description: '获取对话数量和对话消息数量的时间序列统计数据',
tags: [TagsMap.adminDashboard],
requestParams: {
query: z.object({
startTime: z.string().meta({
description: '查询起始时间ISO 8601 格式)'
})
})
},
responses: {
200: {
description: '成功获取对话统计数据',
content: {
'application/json': {
schema: GetChatFormDataResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getWorkflowQpmRange': {
get: {
summary: '获取工作流 QPM 范围分布',
description: '按团队最大 QPM 统计各范围的团队数量',
tags: [TagsMap.adminDashboard],
requestParams: {
query: z.object({
startTime: z.string().meta({
description: '查询起始时间ISO 8601 格式)'
})
})
},
responses: {
200: {
description: '成功获取 QPM 范围分布',
content: {
'application/json': {
schema: GetQpmRangeResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getCostFormData': {
post: {
summary: '获取消费统计数据',
description: '获取积分消耗的时间序列统计数据',
tags: [TagsMap.adminDashboard],
requestBody: {
content: {
'application/json': {
schema: GetDataChartsQuerySchema
}
}
},
responses: {
200: {
description: '成功获取消费统计数据',
content: {
'application/json': {
schema: GetCostFormDataResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getPaysFormData': {
get: {
summary: '获取支付统计数据',
description: '获取订单和支付金额的时间序列统计数据',
tags: [TagsMap.adminDashboard],
requestParams: {
query: z.object({
startTime: z.string().meta({
description: '查询起始时间ISO 8601 格式)'
})
})
},
responses: {
200: {
description: '成功获取支付统计数据',
content: {
'application/json': {
schema: GetPaysFormDataResponseSchema
}
}
}
}
}
},
'/admin/core/dashboard/getUserFormData': {
get: {
summary: '获取用户注册统计数据',
description: '获取用户注册数量的时间序列统计数据',
tags: [TagsMap.adminDashboard],
requestParams: {
query: z.object({
startTime: z.string().meta({
description: '查询起始时间ISO 8601 格式)'
})
})
},
responses: {
200: {
description: '成功获取用户统计数据',
content: {
'application/json': {
schema: GetUserFormDataResponseSchema
}
}
}
}
}
}
};

View File

@ -8,5 +8,7 @@ export const TagsMap = {
pluginTeam: '团队插件管理',
apiKey: 'APIKey',
walletBill: '订单',
walletDiscountCoupon: '优惠券'
walletDiscountCoupon: '优惠券',
adminDashboard: '管理员仪表盘'
};

View File

@ -11,19 +11,24 @@ const TrackSchema = new Schema({
data: Object
});
try {
TrackSchema.index({ event: 1 });
TrackSchema.index(
{ event: 1, teamId: 1, 'data.datasetId': 1, createTime: -1 },
{
partialFilterExpression: {
'data.datasetId': { $exists: true }
}
TrackSchema.index({ event: 1 });
// Dataset search index
TrackSchema.index(
{ event: 1, teamId: 1, 'data.datasetId': 1, createTime: -1 },
{
partialFilterExpression: {
event: TrackEnum.datasetSearch
}
);
} catch (error) {
console.log(error);
}
}
);
// QPM index
TrackSchema.index(
{ event: 1, createTime: -1 },
{
partialFilterExpression: {
event: TrackEnum.teamChatQPM
}
}
);
export const TrackModel = getMongoModel<TrackSchemaType>('tracks', TrackSchema);

View File

@ -64,13 +64,14 @@ export const recallFromVectorStore = (props: EmbeddingRecallCtrlProps) =>
retryFn(() => Vector.embRecall(props));
export const getVectorDataByTime = Vector.getVectorDataByTime;
// Count vector
export const getVectorCountByTeamId = async (teamId: string) => {
const cacheCount = await teamVectorCache.get(teamId);
if (cacheCount !== undefined) {
return cacheCount;
}
const count = await Vector.getVectorCountByTeamId(teamId);
const count = await Vector.getVectorCount({ teamId });
teamVectorCache.set({
teamId,
@ -79,9 +80,7 @@ export const getVectorCountByTeamId = async (teamId: string) => {
return count;
};
export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
export const getVectorCount = Vector.getVectorCount;
export const insertDatasetDataVector = async ({
model,

View File

@ -257,43 +257,36 @@ export class MilvusCtrl {
};
};
getVectorCountByTeamId = async (teamId: string) => {
getVectorCount = async (props: {
teamId?: string;
datasetId?: string;
collectionId?: string;
}) => {
const { teamId, datasetId, collectionId } = props;
const client = await this.getClient();
// Build filter conditions dynamically (each condition wrapped in parentheses)
const filterConditions: string[] = [];
if (teamId) {
filterConditions.push(`(teamId == "${String(teamId)}")`);
}
if (datasetId) {
filterConditions.push(`(datasetId == "${String(datasetId)}")`);
}
if (collectionId) {
filterConditions.push(`(collectionId == "${String(collectionId)}")`);
}
// If no conditions provided, count all (empty filter)
const filter = filterConditions.length > 0 ? filterConditions.join(' and ') : '';
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `teamId == "${String(teamId)}"`
});
const total = result.data?.[0]?.['count(*)'] as number;
return total;
};
getVectorCountByDatasetId = async (teamId: string, datasetId: string) => {
const client = await this.getClient();
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `(teamId == "${String(teamId)}") and (dataset == "${String(datasetId)}")`
});
const total = result.data?.[0]?.['count(*)'] as number;
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const client = await this.getClient();
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
filter: filter || undefined
});
const total = result.data?.[0]?.['count(*)'] as number;

View File

@ -180,33 +180,34 @@ export class ObVectorCtrl {
datasetId: item.dataset_id
}));
};
getVectorCountByTeamId = async (teamId: string) => {
const total = await ObClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)]]
});
return total;
};
getVectorCountByDatasetId = async (teamId: string, datasetId: string) => {
const total = await ObClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
getVectorCount = async (props: {
teamId?: string;
datasetId?: string;
collectionId?: string;
}) => {
const { teamId, datasetId, collectionId } = props;
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
// Build where conditions dynamically
const whereConditions: any[] = [];
if (teamId) {
whereConditions.push(['team_id', String(teamId)]);
}
if (datasetId) {
if (whereConditions.length > 0) whereConditions.push('and');
whereConditions.push(['dataset_id', String(datasetId)]);
}
if (collectionId) {
if (whereConditions.length > 0) whereConditions.push('and');
whereConditions.push(['collection_id', String(collectionId)]);
}
// If no conditions provided, count all
const total = await ObClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
where: whereConditions.length > 0 ? whereConditions : undefined
});
return total;

View File

@ -204,33 +204,34 @@ export class PgVectorCtrl {
datasetId: item.dataset_id
}));
};
getVectorCountByTeamId = async (teamId: string) => {
const total = await PgClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)]]
});
return total;
};
getVectorCountByDatasetId = async (teamId: string, datasetId: string) => {
const total = await PgClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
getVectorCount = async (props: {
teamId?: string;
datasetId?: string;
collectionId?: string;
}) => {
const { teamId, datasetId, collectionId } = props;
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
// Build where conditions dynamically
const whereConditions: any[] = [];
if (teamId) {
whereConditions.push(['team_id', String(teamId)]);
}
if (datasetId) {
if (whereConditions.length > 0) whereConditions.push('and');
whereConditions.push(['dataset_id', String(datasetId)]);
}
if (collectionId) {
if (whereConditions.length > 0) whereConditions.push('and');
whereConditions.push(['collection_id', String(collectionId)]);
}
// If no conditions provided, count all
const total = await PgClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
where: whereConditions.length > 0 ? whereConditions : undefined
});
return total;

View File

@ -126,7 +126,6 @@ const AppSchema = new Schema(
}
);
AppSchema.index({ type: 1 });
AppSchema.index({ teamId: 1, updateTime: -1 });
AppSchema.index({ teamId: 1, type: 1 });
AppSchema.index(
@ -137,5 +136,7 @@ AppSchema.index(
}
}
);
// Admin count
AppSchema.index({ type: 1 });
export const MongoApp = getMongoModel<AppType>(AppCollectionName, AppSchema);

View File

@ -148,7 +148,7 @@ const DatasetSchema = new Schema({
try {
DatasetSchema.index({ teamId: 1 });
DatasetSchema.index({ type: 1 });
DatasetSchema.index({ type: 1 }); // Admin count
DatasetSchema.index({ deleteTime: 1 }); // 添加软删除字段索引
} catch (error) {
console.log(error);

View File

@ -9,7 +9,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { type DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorDB/controller';
import { getVectorCount } from '@fastgpt/service/common/vectorDB/controller';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { getS3DatasetSource } from '@fastgpt/service/common/s3/sources/dataset';
@ -38,7 +38,11 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
const [file, indexAmount, errorCount] = await Promise.all([
fileId ? getS3DatasetSource().getFileMetadata(fileId) : undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id),
getVectorCount({
teamId: collection.teamId,
datasetId: collection.datasetId,
collectionId: collection._id
}),
MongoDatasetTraining.countDocuments(
{
teamId: collection.teamId,

View File

@ -6,8 +6,7 @@ import {
mockVectorInit,
mockGetVectorDataByTime,
mockGetVectorCountByTeamId,
mockGetVectorCountByDatasetId,
mockGetVectorCountByCollectionId,
mockGetVectorCount,
resetVectorMocks
} from '@test/mocks/common/vector';
import { mockGetVectorsByText } from '@test/mocks/core/ai/embedding';
@ -18,8 +17,7 @@ import {
recallFromVectorStore,
getVectorDataByTime,
getVectorCountByTeamId,
getVectorCountByDatasetId,
getVectorCountByCollectionId,
getVectorCount,
insertDatasetDataVector,
deleteDatasetDataVector
} from '@fastgpt/service/common/vectorDB/controller';
@ -147,24 +145,18 @@ describe('VectorDB Controller', () => {
});
});
describe('getVectorCountByDatasetId', () => {
it('should call Vector.getVectorCountByDatasetId', async () => {
const result = await getVectorCountByDatasetId('team_1', 'dataset_1');
describe('getVectorCount', () => {
it('should call Vector.getVectorCount', async () => {
const result = await getVectorCount({ teamId: 'team_1', datasetId: 'dataset_1' });
expect(mockGetVectorCountByDatasetId).toHaveBeenCalledWith('team_1', 'dataset_1');
expect(mockGetVectorCount).toHaveBeenCalledWith({
teamId: 'team_1',
datasetId: 'dataset_1'
});
expect(result).toBe(50);
});
});
describe('getVectorCountByCollectionId', () => {
it('should call Vector.getVectorCountByCollectionId', async () => {
const result = await getVectorCountByCollectionId('team_1', 'dataset_1', 'col_1');
expect(mockGetVectorCountByCollectionId).toHaveBeenCalledWith('team_1', 'dataset_1', 'col_1');
expect(result).toBe(25);
});
});
describe('insertDatasetDataVector', () => {
const mockModel = {
model: 'text-embedding-ada-002',

View File

@ -26,9 +26,7 @@ export const mockGetVectorDataByTime = vi.fn().mockResolvedValue([
export const mockGetVectorCountByTeamId = vi.fn().mockResolvedValue(100);
export const mockGetVectorCountByDatasetId = vi.fn().mockResolvedValue(50);
export const mockGetVectorCountByCollectionId = vi.fn().mockResolvedValue(25);
export const mockGetVectorCount = vi.fn().mockResolvedValue(50);
const MockVectorCtrl = vi.fn().mockImplementation(() => ({
init: mockVectorInit,
@ -37,8 +35,7 @@ const MockVectorCtrl = vi.fn().mockImplementation(() => ({
embRecall: mockVectorEmbRecall,
getVectorDataByTime: mockGetVectorDataByTime,
getVectorCountByTeamId: mockGetVectorCountByTeamId,
getVectorCountByDatasetId: mockGetVectorCountByDatasetId,
getVectorCountByCollectionId: mockGetVectorCountByCollectionId
getVectorCount: mockGetVectorCount
}));
// Mock PgVectorCtrl
@ -74,6 +71,5 @@ export const resetVectorMocks = () => {
mockVectorInit.mockClear();
mockGetVectorDataByTime.mockClear();
mockGetVectorCountByTeamId.mockClear();
mockGetVectorCountByDatasetId.mockClear();
mockGetVectorCountByCollectionId.mockClear();
mockGetVectorCount.mockClear();
};