mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-25 20:02:47 +00:00
perf: remove dataset code (#6132)
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
* stop design doc * perf: init worker * perf: remove dataset cide * remove invalid doc
This commit is contained in:
parent
2fea73bb68
commit
b0a48603f8
|
|
@ -31,10 +31,10 @@ export const addAppDeleteJob = (data: AppDeleteJobData) => {
|
|||
}
|
||||
});
|
||||
|
||||
const jobId = `${data.teamId}:${data.appId}`;
|
||||
const jobId = `${String(data.teamId)}:${String(data.appId)}`;
|
||||
|
||||
// Use jobId to automatically prevent duplicate deletion tasks (BullMQ feature)
|
||||
return appDeleteQueue.add('deleteapp', data, {
|
||||
return appDeleteQueue.add('delete_app', data, {
|
||||
jobId,
|
||||
delay: 1000 // Delay 1 second to ensure API response completes
|
||||
});
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ export async function delDatasetRelevantData({
|
|||
datasets,
|
||||
session
|
||||
}: {
|
||||
datasets: DatasetSchemaType[];
|
||||
datasets: { _id: string; teamId: string }[];
|
||||
session: ClientSession;
|
||||
}) {
|
||||
if (!datasets.length) return;
|
||||
|
|
@ -115,24 +115,6 @@ export async function delDatasetRelevantData({
|
|||
// Delete vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
||||
|
||||
for (const datasetId of datasetIds) {
|
||||
// Delete dataset_data_texts in batches by datasetId
|
||||
await MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId
|
||||
}).maxTimeMS(300000); // Reduce timeout for single batch
|
||||
// Delete dataset_datas in batches by datasetId
|
||||
await MongoDatasetData.deleteMany({
|
||||
teamId,
|
||||
datasetId
|
||||
}).maxTimeMS(300000);
|
||||
}
|
||||
|
||||
// Delete source: 兼容旧版的图片
|
||||
await delCollectionRelatedSource({ collections });
|
||||
// Delete vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany({
|
||||
teamId,
|
||||
|
|
|
|||
|
|
@ -31,11 +31,11 @@ export const addDatasetDeleteJob = (data: DatasetDeleteJobData) => {
|
|||
}
|
||||
});
|
||||
|
||||
const jobId = `${data.teamId}:${data.datasetId}`;
|
||||
const jobId = `${String(data.teamId)}:${String(data.datasetId)}`;
|
||||
|
||||
// 使用去重机制,避免重复删除
|
||||
return datasetDeleteQueue.add(jobId, data, {
|
||||
deduplication: { id: jobId },
|
||||
return datasetDeleteQueue.add('delete_dataset', data, {
|
||||
jobId,
|
||||
delay: 1000 // 延迟1秒执行,确保API响应完成
|
||||
});
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import type { Processor } from 'bullmq';
|
||||
import type { DatasetDeleteJobData } from './index';
|
||||
import { addDatasetDeleteJob, type DatasetDeleteJobData } from './index';
|
||||
import { delDatasetRelevantData, findDatasetAndAllChildren } from '../controller';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { removeDatasetSyncJobScheduler } from '../datasetSync';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
|
|
@ -12,36 +11,53 @@ import { MongoDatasetTraining } from '../training/schema';
|
|||
|
||||
export const deleteDatasetsImmediate = async ({
|
||||
teamId,
|
||||
datasets
|
||||
datasetIds
|
||||
}: {
|
||||
teamId: string;
|
||||
datasets: DatasetSchemaType[];
|
||||
datasetIds: string[];
|
||||
}) => {
|
||||
const datasetIds = datasets.map((d) => d._id);
|
||||
|
||||
// delete training data
|
||||
MongoDatasetTraining.deleteMany({
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
// Remove cron job
|
||||
await Promise.all(
|
||||
datasetIds.map((id) => {
|
||||
return removeDatasetSyncJobScheduler(id);
|
||||
})
|
||||
);
|
||||
};
|
||||
// Clear a team datasets
|
||||
export const deleteTeamAllDatasets = async (teamId: string) => {
|
||||
const datasets = await MongoDataset.find(
|
||||
{
|
||||
teamId
|
||||
},
|
||||
{ _id: 1, parentId: 1 }
|
||||
);
|
||||
await deleteDatasetsImmediate({
|
||||
teamId,
|
||||
datasetIds: datasets.map((d) => d._id)
|
||||
});
|
||||
await Promise.all(
|
||||
datasets.map((dataset) => {
|
||||
// 只处理已标记删除的数据集
|
||||
if (datasetIds.includes(dataset._id)) {
|
||||
return removeDatasetSyncJobScheduler(dataset._id);
|
||||
}
|
||||
if (dataset.parentId) return;
|
||||
return addDatasetDeleteJob({
|
||||
teamId,
|
||||
datasetId: dataset._id
|
||||
});
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
export const deleteDatasets = async ({
|
||||
const deleteDatasets = async ({
|
||||
teamId,
|
||||
datasets
|
||||
}: {
|
||||
teamId: string;
|
||||
datasets: DatasetSchemaType[];
|
||||
datasets: { _id: string; avatar: string; teamId: string }[];
|
||||
}) => {
|
||||
const datasetIds = datasets.map((d) => d._id);
|
||||
|
||||
|
|
@ -81,7 +97,8 @@ export const datasetDeleteProcessor: Processor<DatasetDeleteJobData> = async (jo
|
|||
// 1. 查找知识库及其所有子知识库
|
||||
const datasets = await findDatasetAndAllChildren({
|
||||
teamId,
|
||||
datasetId
|
||||
datasetId,
|
||||
fields: '_id teamId avatar'
|
||||
});
|
||||
|
||||
if (!datasets || datasets.length === 0) {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ const PromotionRecordSchema = new Schema({
|
|||
}
|
||||
});
|
||||
|
||||
PromotionRecordSchema.index({ userId: 1 });
|
||||
|
||||
export const MongoPromotionRecord = getMongoModel<PromotionRecordType>(
|
||||
'promotionRecord',
|
||||
PromotionRecordSchema
|
||||
|
|
|
|||
|
|
@ -32,14 +32,16 @@ async function handler(req: NextApiRequest) {
|
|||
|
||||
const deleteDatasets = await findDatasetAndAllChildren({
|
||||
teamId,
|
||||
datasetId
|
||||
datasetId,
|
||||
fields: '_id'
|
||||
});
|
||||
const datasetIds = deleteDatasets.map((d) => d._id);
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 1. Mark as deleted
|
||||
await MongoDataset.updateMany(
|
||||
{
|
||||
_id: deleteDatasets.map((d) => d._id),
|
||||
_id: datasetIds,
|
||||
teamId
|
||||
},
|
||||
{
|
||||
|
|
@ -52,7 +54,7 @@ async function handler(req: NextApiRequest) {
|
|||
|
||||
await deleteDatasetsImmediate({
|
||||
teamId,
|
||||
datasets: deleteDatasets
|
||||
datasetIds
|
||||
});
|
||||
|
||||
// 2. Add to delete queue
|
||||
|
|
|
|||
|
|
@ -5,7 +5,5 @@ import { initAppDeleteWorker } from '@fastgpt/service/core/app/delete';
|
|||
|
||||
export const initBullMQWorkers = () => {
|
||||
addLog.info('Init BullMQ Workers...');
|
||||
initS3MQWorker();
|
||||
initDatasetDeleteWorker();
|
||||
initAppDeleteWorker();
|
||||
return Promise.all([initS3MQWorker(), initDatasetDeleteWorker(), initAppDeleteWorker()]);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ describe('App Delete Queue', () => {
|
|||
}
|
||||
});
|
||||
|
||||
expect(mockQueue.add).toHaveBeenCalledWith('deleteapp', jobData, {
|
||||
expect(mockQueue.add).toHaveBeenCalledWith('delete_app', jobData, {
|
||||
jobId: 'team-123:app-123',
|
||||
delay: 1000
|
||||
});
|
||||
|
|
@ -106,7 +106,7 @@ describe('App Delete Queue', () => {
|
|||
await addAppDeleteJob(jobData);
|
||||
|
||||
expect(mockQueue.add).toHaveBeenCalledWith(
|
||||
'deleteapp',
|
||||
'delete_app',
|
||||
jobData,
|
||||
expect.objectContaining({
|
||||
jobId: 'team-xyz:app-abc'
|
||||
|
|
@ -155,7 +155,7 @@ describe('App Delete API Integration', () => {
|
|||
|
||||
// Verify queue job was added
|
||||
expect(mockQueue.add).toHaveBeenCalledWith(
|
||||
'deleteapp',
|
||||
'delete_app',
|
||||
{
|
||||
teamId: rootUser.teamId,
|
||||
appId: String(testApp._id)
|
||||
|
|
|
|||
Loading…
Reference in New Issue