mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-26 04:32:50 +00:00
79 lines
1.9 KiB
TypeScript
79 lines
1.9 KiB
TypeScript
import type { NextApiRequest, NextApiResponse } from 'next';
|
||
import { jsonRes } from '@/service/response';
|
||
import { connectToDatabase, SplitData, Model } from '@/service/mongo';
|
||
import { authToken } from '@/service/utils/tools';
|
||
import { generateQA } from '@/service/events/generateQA';
|
||
import { encode } from 'gpt-token-utils';
|
||
|
||
/* 拆分数据成QA */
|
||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||
try {
|
||
const { text, modelId, prompt } = req.body as { text: string; modelId: string; prompt: string };
|
||
if (!text || !modelId || !prompt) {
|
||
throw new Error('参数错误');
|
||
}
|
||
await connectToDatabase();
|
||
|
||
const { authorization } = req.headers;
|
||
|
||
const userId = await authToken(authorization);
|
||
|
||
// 验证是否是该用户的 model
|
||
const model = await Model.findOne({
|
||
_id: modelId,
|
||
userId
|
||
});
|
||
|
||
if (!model) {
|
||
throw new Error('无权操作该模型');
|
||
}
|
||
|
||
const replaceText = text.replace(/(\\n|\n)+/g, ' ');
|
||
|
||
// 文本拆分成 chunk
|
||
let chunks = replaceText.match(/[^!?.。]+[!?.。]/g) || [];
|
||
|
||
const textList: string[] = [];
|
||
let splitText = '';
|
||
|
||
/* 取 3k ~ 4K tokens 内容 */
|
||
chunks.forEach((chunk) => {
|
||
const tokens = encode(splitText + chunk).length;
|
||
if (tokens >= 4000) {
|
||
// 超过 4000,不要这块内容
|
||
textList.push(splitText);
|
||
splitText = chunk;
|
||
} else if (tokens >= 3000) {
|
||
// 超过 3000,取内容
|
||
textList.push(splitText + chunk);
|
||
splitText = '';
|
||
} else {
|
||
//没超过 3000,继续添加
|
||
splitText += chunk;
|
||
}
|
||
});
|
||
|
||
if (splitText) {
|
||
textList.push(splitText);
|
||
}
|
||
|
||
// 批量插入数据
|
||
await SplitData.create({
|
||
userId,
|
||
modelId,
|
||
rawText: text,
|
||
textList,
|
||
prompt
|
||
});
|
||
|
||
generateQA();
|
||
|
||
jsonRes(res);
|
||
} catch (err) {
|
||
jsonRes(res, {
|
||
code: 500,
|
||
error: err
|
||
});
|
||
}
|
||
}
|