mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-25 20:02:47 +00:00
* add logs chart (#5352) * charts * chart data * log chart * delete * rename api * fix * move api * fix * fix * pro config * fix * feat: Repository interaction (#5356) * feat: 1好像功能没问题了,明天再测 * feat: 2 解决了昨天遗留的bug,但全选按钮又bug了 * feat: 3 第三版,解决了全选功能bug * feat: 4 第四版,下面改小细节 * feat: 5 我勒个痘 * feat: 6 * feat: 6 pr * feat: 7 * feat: 8 * feat: 9 * feat: 10 * feat: 11 * feat: 12 * perf: checkbox ui * refactor: tweak login loyout (#5357) Co-authored-by: Archer <545436317@qq.com> * login ui * app chat log chart pro display (#5392) * app chat log chart pro display * add canopen props * perf: pro tag tip * perf: pro tag tip * feat: openrouter provider (#5406) * perf: login ui * feat: openrouter provider * provider * perf: custom error throw * perf: emb batch (#5407) * perf: emb batch * perf: vector retry * doc * doc (#5411) * doc * fix: team folder will add to workflow * fix: generateToc shell * Tool price (#5376) * resolve conflicts for cherry-pick * fix i18n * Enhance system plugin template data structure and update ToolSelectModal to include CostTooltip component * refactor: update systemKeyCost type to support array of objects in plugin and workflow types * refactor: simplify systemKeyCost type across plugin and workflow types to a single number * refactor: streamline systemKeyCost handling in plugin and workflow components * fix * fix * perf: toolset price config;fix: workflow array selector ui (#5419) * fix: workflow array selector ui * update default model tip * perf: toolset price config * doc * fix: test * Refactor/chat (#5418) * refactor: add homepage configuration; add home chat page; add side bar animated collapse and layout * fix: fix lint rules * chore: improve logics and code * chore: more clearer logics * chore: adjust api --------- Co-authored-by: Archer <545436317@qq.com> * perf: chat setting code * del history * logo image * perf: home chat ui * feat: enhance chat response handling with external links and user info (#5427) * feat: enhance chat response handling with external links and user info * fix * cite code * perf: toolset add in workflow * fix: test * fix: search paraentId * Fix/chat (#5434) * wip: rebase了upstream * wip: adapt mobile UI * fix: fix chat page logic and UI * fix: fix UI and improve some logics * fix: model selector missing logo; vision model to retrieve file * perf: role selector * fix: chat ui * optimize export app chat log (#5436) * doc * chore: move components to proper directory; fix the api to get app list (#5437) * chore: improve team app panel display form (#5438) * feat: add home chat log tab * chore: improve team app panel display form * chore: improve log panel * fix: spec * doc * fix: log permission * fix: dataset schema required * add loading status * remove ui weight * manage log * fix: log detail per * doc * fix: log menu * rename permission * bg color * fix: app log per * fix: log key selector * fix: log * doc --------- Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: colnii <1286949794@qq.com> Co-authored-by: 伍闲犬 <76519998+xqvvu@users.noreply.github.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
192 lines
4.8 KiB
TypeScript
192 lines
4.8 KiB
TypeScript
import { MongoDatasetTraining } from './schema';
|
|
import type {
|
|
PushDatasetDataChunkProps,
|
|
PushDatasetDataResponse
|
|
} from '@fastgpt/global/core/dataset/api.d';
|
|
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
|
import { simpleText } from '@fastgpt/global/common/string/tools';
|
|
import { type ClientSession } from '../../../common/mongo';
|
|
import { getLLMModel, getEmbeddingModel, getVlmModel } from '../../ai/model';
|
|
import { addLog } from '../../../common/system/log';
|
|
import { getCollectionWithDataset } from '../controller';
|
|
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
|
import { type PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
|
|
import { i18nT } from '../../../../web/i18n/utils';
|
|
import { getLLMMaxChunkSize } from '../../../../global/core/dataset/training/utils';
|
|
|
|
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
|
try {
|
|
await MongoDatasetTraining.updateMany(
|
|
{
|
|
teamId
|
|
},
|
|
{
|
|
lockTime: new Date('2999/5/5')
|
|
}
|
|
);
|
|
} catch (error) {}
|
|
};
|
|
|
|
export async function pushDataListToTrainingQueue({
|
|
teamId,
|
|
tmbId,
|
|
datasetId,
|
|
collectionId,
|
|
agentModel,
|
|
vectorModel,
|
|
vlmModel,
|
|
data,
|
|
billId,
|
|
mode = TrainingModeEnum.chunk,
|
|
indexSize,
|
|
session
|
|
}: PushDataToTrainingQueueProps): Promise<PushDatasetDataResponse> {
|
|
const vectorModelData = getEmbeddingModel(vectorModel);
|
|
if (!vectorModelData) {
|
|
return Promise.reject(i18nT('common:error_embedding_not_config'));
|
|
}
|
|
const agentModelData = getLLMModel(agentModel);
|
|
if (!agentModelData) {
|
|
return Promise.reject(i18nT('common:error_llm_not_config'));
|
|
}
|
|
|
|
const { model, maxToken, weight } = await (async () => {
|
|
if (mode === TrainingModeEnum.chunk) {
|
|
return {
|
|
maxToken: Infinity,
|
|
model: vectorModelData.model,
|
|
weight: vectorModelData.weight
|
|
};
|
|
}
|
|
if (mode === TrainingModeEnum.qa || mode === TrainingModeEnum.auto) {
|
|
return {
|
|
maxToken: getLLMMaxChunkSize(agentModelData),
|
|
model: agentModelData.model,
|
|
weight: 0
|
|
};
|
|
}
|
|
if (mode === TrainingModeEnum.image || mode === TrainingModeEnum.imageParse) {
|
|
const vllmModelData = getVlmModel(vlmModel);
|
|
if (!vllmModelData) {
|
|
return Promise.reject(i18nT('common:error_vlm_not_config'));
|
|
}
|
|
return {
|
|
maxToken: getLLMMaxChunkSize(vllmModelData),
|
|
model: vllmModelData.model,
|
|
weight: 0
|
|
};
|
|
}
|
|
|
|
return Promise.reject(`Training mode "${mode}" is inValid`);
|
|
})();
|
|
|
|
// format q and a, remove empty char
|
|
data = data.filter((item) => {
|
|
const q = item.q || '';
|
|
const a = item.a || '';
|
|
|
|
// filter repeat content
|
|
if (!item.imageId && !q) {
|
|
return;
|
|
}
|
|
|
|
const text = q + a;
|
|
|
|
// Oversize llm tokens
|
|
if (text.length > maxToken) {
|
|
return;
|
|
}
|
|
|
|
return true;
|
|
});
|
|
|
|
// insert data to db
|
|
const insertLen = data.length;
|
|
|
|
// 使用 insertMany 批量插入
|
|
const batchSize = 500;
|
|
const insertData = async (startIndex: number, session: ClientSession) => {
|
|
const list = data.slice(startIndex, startIndex + batchSize);
|
|
|
|
if (list.length === 0) return;
|
|
|
|
try {
|
|
const result = await MongoDatasetTraining.insertMany(
|
|
list.map((item) => ({
|
|
teamId,
|
|
tmbId,
|
|
datasetId: datasetId,
|
|
collectionId: collectionId,
|
|
billId,
|
|
mode,
|
|
...(item.q && { q: item.q }),
|
|
...(item.a && { a: item.a }),
|
|
...(item.imageId && { imageId: item.imageId }),
|
|
chunkIndex: item.chunkIndex ?? 0,
|
|
indexSize,
|
|
weight: weight ?? 0,
|
|
indexes: item.indexes,
|
|
retryCount: 5
|
|
})),
|
|
{
|
|
session,
|
|
ordered: false,
|
|
rawResult: true,
|
|
includeResultMetadata: false // 进一步减少返回数据
|
|
}
|
|
);
|
|
|
|
if (result.insertedCount !== list.length) {
|
|
return Promise.reject(`Insert data error, ${JSON.stringify(result)}`);
|
|
}
|
|
} catch (error: any) {
|
|
addLog.error(`Insert error`, error);
|
|
return Promise.reject(error);
|
|
}
|
|
|
|
return insertData(startIndex + batchSize, session);
|
|
};
|
|
|
|
if (session) {
|
|
await insertData(0, session);
|
|
} else {
|
|
await mongoSessionRun(async (session) => {
|
|
await insertData(0, session);
|
|
});
|
|
}
|
|
|
|
return {
|
|
insertLen
|
|
};
|
|
}
|
|
|
|
export const pushDatasetToParseQueue = async ({
|
|
teamId,
|
|
tmbId,
|
|
datasetId,
|
|
collectionId,
|
|
billId,
|
|
session
|
|
}: {
|
|
teamId: string;
|
|
tmbId: string;
|
|
datasetId: string;
|
|
collectionId: string;
|
|
billId: string;
|
|
session: ClientSession;
|
|
}) => {
|
|
await MongoDatasetTraining.create(
|
|
[
|
|
{
|
|
teamId,
|
|
tmbId,
|
|
datasetId,
|
|
collectionId,
|
|
billId,
|
|
mode: TrainingModeEnum.parse
|
|
}
|
|
],
|
|
{ session, ordered: true }
|
|
);
|
|
};
|