Feat: pptx and xlsx loader (#1118)
Some checks failed
deploy-docs / deploy-production (push) Has been cancelled
Build docs images and copy image to docker hub / build-fastgpt-docs-images (push) Has been cancelled
Build FastGPT images in Personal warehouse / build-fastgpt-images (push) Has been cancelled
Build docs images and copy image to docker hub / update-docs-image (push) Has been cancelled

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer 2024-04-01 19:01:26 +08:00 committed by GitHub
parent f9d266a6af
commit 21288d1736
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 2707 additions and 1678 deletions

View File

@ -22,7 +22,7 @@ weight: 356
## 工具是如何运行的
要了解工具如何允许,首先需要知道它的运行条件。
要了解工具如何运行的,首先需要知道它的运行条件。
1. 需要工具的介绍或者叫描述。这个介绍会告诉LLM这个工具的作用是什么LLM会根据上下文语义决定是否需要调用这个工具。
2. 工具的参数。有些工具调用时可能需要一些特殊的参数。参数中有2个关键的值`参数介绍`和`是否必须`。

View File

@ -3,12 +3,17 @@ import { ErrType } from '../errorCode';
/* dataset: 507000 */
const startCode = 507000;
export enum CommonErrEnum {
fileNotFound = 'fileNotFound'
fileNotFound = 'fileNotFound',
unAuthFile = 'unAuthFile'
}
const datasetErr = [
{
statusText: CommonErrEnum.fileNotFound,
message: 'error.fileNotFound'
},
{
statusText: CommonErrEnum.unAuthFile,
message: 'error.unAuthFile'
}
];
export default datasetErr.reduce((acc, cur, index) => {

View File

@ -40,9 +40,9 @@ export const splitText2Chunks = (props: {
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // (?![\*\-|>`0-9]): markdown special char
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
{ reg: /([\n])/g, maxLen: chunkLen * 1.2 },
// ------ There's no overlap on the top
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 },
{ reg: /([]|!\s)/g, maxLen: chunkLen * 1.2 },
{ reg: /([]|\?\s)/g, maxLen: chunkLen * 1.4 },
@ -56,7 +56,7 @@ export const splitText2Chunks = (props: {
const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
// if use markdown title split, Separate record title title
// if use markdown title split, Separate record title
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
if (step >= stepReges.length) {
return [
@ -97,6 +97,7 @@ export const splitText2Chunks = (props: {
.filter((item) => item.text.trim());
};
/* Gets the overlap at the end of a text as the beginning of the next block */
const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
const forbidOverlap = checkForbidOverlap(step);
const maxOverlapLen = chunkLen * 0.4;

View File

@ -55,6 +55,7 @@ export type FastGPTFeConfigsType = {
customApiDomain?: string;
customSharePageDomain?: string;
uploadFileMaxAmount?: number;
uploadFileMaxSize?: number;
};

View File

@ -44,14 +44,18 @@ export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
link: string;
};
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
fileId: string;
};
export type FileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
name: string;
rawTextLength: number;
hashRawText: string;
fileMetadata?: Record<string, any>;
collectionMetadata?: Record<string, any>;
};
export type CsvTableCreateDatasetCollectionParams = {
datasetId: string;
parentId?: string;
fileId: string;
};
/* ================= data ===================== */
export type PgSearchRawType = {

View File

@ -73,6 +73,13 @@ export const DatasetCollectionSyncResultMap = {
/* ------------ data -------------- */
/* ------------ training -------------- */
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable'
}
export enum TrainingModeEnum {
chunk = 'chunk',
auto = 'auto',

View File

@ -2,18 +2,18 @@
"name": "@fastgpt/global",
"version": "1.0.0",
"dependencies": {
"@apidevtools/swagger-parser": "^10.1.0",
"axios": "^1.5.1",
"dayjs": "^1.11.7",
"encoding": "^0.1.13",
"js-tiktoken": "^1.0.7",
"openapi-types": "^12.1.3",
"openai": "4.28.0",
"nanoid": "^4.0.1",
"js-yaml": "^4.1.0",
"timezones-list": "^3.0.2",
"next": "13.5.2",
"jschardet": "3.1.1",
"@apidevtools/swagger-parser": "^10.1.0"
"nanoid": "^4.0.1",
"next": "13.5.2",
"openai": "4.28.0",
"openapi-types": "^12.1.3",
"timezones-list": "^3.0.2"
},
"devDependencies": {
"@types/js-yaml": "^4.0.9",

View File

@ -0,0 +1,33 @@
import { connectionMongo, type Model } from '../../mongo';
const { Schema, model, models } = connectionMongo;
import { RawTextBufferSchemaType } from './type';
export const collectionName = 'buffer.rawText';
const RawTextBufferSchema = new Schema({
sourceId: {
type: String,
required: true
},
rawText: {
type: String,
default: ''
},
createTime: {
type: Date,
default: () => new Date()
},
metadata: Object
});
try {
RawTextBufferSchema.index({ sourceId: 1 });
// 20 minutes
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
} catch (error) {
console.log(error);
}
export const MongoRwaTextBuffer: Model<RawTextBufferSchemaType> =
models[collectionName] || model(collectionName, RawTextBufferSchema);
MongoRwaTextBuffer.syncIndexes();

View File

@ -0,0 +1,8 @@
export type RawTextBufferSchemaType = {
sourceId: string;
rawText: string;
createTime: Date;
metadata?: {
filename: string;
};
};

View File

@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { TTSBufferSchemaType } from './type.d';
export const collectionName = 'ttsbuffers';
export const collectionName = 'buffer.tts';
const TTSBufferSchema = new Schema({
bufferId: {

View File

@ -4,6 +4,18 @@ import fsp from 'fs/promises';
import fs from 'fs';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readFileRawText } from '../read/rawText';
import { ReadFileByBufferParams } from '../read/type';
import { readMarkdown } from '../read/markdown';
import { readHtmlRawText } from '../read/html';
import { readPdfFile } from '../read/pdf';
import { readWordFile } from '../read/word';
import { readCsvRawText } from '../read/csv';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
import { readPptxRawText } from '../read/pptx';
import { readXlsxRawText } from '../read/xlsx';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoFileSchema;
@ -111,3 +123,139 @@ export async function getDownloadStream({
return bucket.openDownloadStream(new Types.ObjectId(fileId));
}
export const readFileEncode = async ({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) => {
const encodeStream = await getDownloadStream({ bucketName, fileId });
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detectFileEncoding(buffers);
return encoding as BufferEncoding;
};
export const readFileContent = async ({
teamId,
bucketName,
fileId,
csvFormat = false
}: {
teamId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
csvFormat?: boolean;
}): Promise<{
rawText: string;
filename: string;
}> => {
// read buffer
const fileBuffer = await MongoRwaTextBuffer.findOne({ sourceId: fileId }).lean();
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
};
}
const [file, encoding, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
readFileEncode({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
const fileBuffers = await (() => {
return new Promise<Buffer>((resolve, reject) => {
let buffers = Buffer.from([]);
fileStream.on('data', (chunk) => {
buffers = Buffer.concat([buffers, chunk]);
});
fileStream.on('end', () => {
resolve(buffers);
});
fileStream.on('error', (err) => {
reject(err);
});
});
})();
const params: ReadFileByBufferParams = {
teamId,
buffer: fileBuffers,
encoding,
metadata: {
relatedId: fileId
}
};
const { rawText } = await (async () => {
switch (extension) {
case 'txt':
return readFileRawText(params);
case 'md':
return readMarkdown(params);
case 'html':
return readHtmlRawText(params);
case 'pdf':
return readPdfFile(params);
case 'docx':
return readWordFile(params);
case 'pptx':
return readPptxRawText(params);
case 'xlsx':
const xlsxResult = await readXlsxRawText(params);
if (csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
})();
if (rawText.trim()) {
await MongoRwaTextBuffer.create({
sourceId: fileId,
rawText,
metadata: {
filename: file.filename
}
});
}
return {
rawText,
filename: file.filename
};
};

View File

@ -14,7 +14,6 @@ export async function uploadMongoImg({
teamId,
expiredTime,
metadata,
shareId
}: UploadImgProps & {
teamId: string;
@ -30,9 +29,8 @@ export async function uploadMongoImg({
type,
teamId,
binary,
expiredTime: expiredTime,
expiredTime,
metadata,
shareId
});

View File

@ -25,13 +25,13 @@ const ImageSchema = new Schema({
enum: Object.keys(mongoImageTypeMap),
required: true
},
metadata: {
type: Object
}
});
try {
// tts expired
ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
ImageSchema.index({ type: 1 });
ImageSchema.index({ createTime: 1 });

View File

@ -0,0 +1,21 @@
import Papa from 'papaparse';
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { readFileRawText } from './rawText';
// 加载源文件内容
export const readCsvRawText = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const { rawText } = readFileRawText(params);
const csvArr = Papa.parse(rawText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
: '';
return {
rawText,
formatText
};
};

View File

@ -0,0 +1,23 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { htmlToMarkdown } from '../../string/markdown';
import { readFileRawText } from './rawText';
export const readHtmlRawText = async (
params: ReadFileByBufferParams
): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: html } = readFileRawText(params);
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@ -0,0 +1,18 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { readFileRawText } from './rawText';
export const readMarkdown = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: md } = readFileRawText(params);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@ -0,0 +1,119 @@
import { getNanoid } from '@fastgpt/global/common/string/tools';
import fs from 'fs';
import decompress from 'decompress';
import { DOMParser } from '@xmldom/xmldom';
import { clearDirFiles } from '../utils';
import { addLog } from '../../system/log';
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
function getNewFileName(ext: string) {
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
}
const parseString = (xml: string) => {
let parser = new DOMParser();
return parser.parseFromString(xml, 'text/xml');
};
const parsePowerPoint = async ({
filepath,
decompressPath,
encoding
}: {
filepath: string;
decompressPath: string;
encoding: BufferEncoding;
}) => {
// Files regex that hold our content of interest
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
/** The decompress location which contains the filename in it */
const files = await decompress(filepath, decompressPath, {
filter: (x) => !!x.path.match(allFilesRegex)
});
// Verify if atleast the slides xml files exist in the extracted files list.
if (
files.length == 0 ||
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
) {
return Promise.reject('解析 PPT 失败');
}
// Returning an array of all the xml contents read using fs.readFileSync
const xmlContentArray = files.map((file) =>
fs.readFileSync(`${decompressPath}/${file.path}`, encoding)
);
let responseArr: string[] = [];
xmlContentArray.forEach((xmlContent) => {
/** Find text nodes with a:p tags */
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
/** Store all the text content to respond */
responseArr.push(
Array.from(xmlParagraphNodesList)
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
.map((paragraphNode) => {
/** Find text nodes with a:t tags */
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
return Array.from(xmlTextNodeList)
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
.map((textNode) => textNode.childNodes[0].nodeValue)
.join('');
})
.join('\n')
);
});
return responseArr.join('\n');
};
export const parseOffice = async ({
buffer,
encoding,
extension
}: {
buffer: Buffer;
encoding: BufferEncoding;
extension: string;
}) => {
// Prepare file for processing
// create temp file subdirectory if it does not exist
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
}
// temp file name
const filepath = getNewFileName(extension);
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
// write new file
fs.writeFileSync(filepath, buffer, {
encoding
});
const text = await (async () => {
try {
switch (extension) {
case 'pptx':
return parsePowerPoint({ filepath, decompressPath, encoding });
default:
return Promise.reject('只能读取 .pptx 文件');
}
} catch (error) {
addLog.error(`Load ppt error`, { error });
}
return '';
})();
fs.unlinkSync(filepath);
clearDirFiles(decompressPath);
return text;
};

View File

@ -1,5 +1,7 @@
/* read file to txt */
import * as pdfjsLib from 'pdfjs-dist';
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
// @ts-ignore
import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
import { ReadFileByBufferParams, ReadFileResponse } from './type';
type TokenType = {
str: string;
@ -11,9 +13,9 @@ type TokenType = {
hasEOL: boolean;
};
export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
export const readPdfFile = async ({
buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const readPDFPage = async (doc: any, pageNo: number) => {
const page = await doc.getPage(pageNo);
const tokenizedText = await page.getTextContent();
@ -51,14 +53,19 @@ export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
.join('');
};
const doc = await pdfjsLib.getDocument(pdf).promise;
const loadingTask = pdfjs.getDocument(buffer.buffer);
const doc = await loadingTask.promise;
const pageTextPromises = [];
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
pageTextPromises.push(readPDFPage(doc, pageNo));
}
const pageTexts = await Promise.all(pageTextPromises);
loadingTask.destroy();
return {
rawText: pageTexts.join('')
rawText: pageTexts.join(''),
metadata: {}
};
};

View File

@ -0,0 +1,14 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
// import { parseOfficeAsync } from 'officeparser';
import { parseOffice } from './parseOffice';
export const readPptxRawText = async ({
buffer,
encoding
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const result = await parseOffice({ buffer, encoding, extension: 'pptx' });
return {
rawText: result
};
};

View File

@ -0,0 +1,10 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
// 加载源文件内容
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => {
const content = buffer.toString(encoding);
return {
rawText: content
};
};

View File

@ -0,0 +1,12 @@
export type ReadFileByBufferParams = {
teamId: string;
buffer: Buffer;
encoding: BufferEncoding;
metadata?: Record<string, any>;
};
export type ReadFileResponse = {
rawText: string;
formatText?: string;
metadata?: Record<string, any>;
};

View File

@ -0,0 +1,25 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns';
export const initMarkdownText = ({
teamId,
md,
metadata
}: {
md: string;
teamId: string;
metadata?: Record<string, any>;
}) =>
markdownProcess({
rawText: md,
uploadImgController: (base64Img) =>
uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img,
teamId,
metadata,
expiredTime: addHours(new Date(), 2)
})
});

View File

@ -0,0 +1,35 @@
import mammoth from 'mammoth';
import { htmlToMarkdown } from '../../string/markdown';
import { ReadFileByBufferParams, ReadFileResponse } from './type';
import { initMarkdownText } from './utils';
/**
* read docx to markdown
*/
export const readWordFile = async ({
teamId,
buffer,
metadata = {}
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText,
metadata: {}
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};

View File

@ -0,0 +1,45 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import xlsx from 'node-xlsx';
import Papa from 'papaparse';
export const readXlsxRawText = async ({
buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const result = xlsx.parse(buffer, {
skipHidden: false,
defval: ''
});
const format2Csv = result.map(({ name, data }) => {
return {
title: `#${name}`,
csvText: data.map((item) => item.join(',')).join('\n')
};
});
const rawText = format2Csv.map((item) => item.csvText).join('\n');
const formatText = format2Csv
.map((item) => {
const csvArr = Papa.parse(item.csvText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr
.map((item) =>
item
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
.filter(Boolean)
.join('\n')
)
.join('\n')
: '';
return `${item.title}\n${formatText}`;
})
.join('\n');
return {
rawText: rawText,
formatText
};
};

View File

@ -35,13 +35,8 @@ export const clearDirFiles = (dirPath: string) => {
return;
}
fs.readdirSync(dirPath).forEach((file) => {
const curPath = `${dirPath}/${file}`;
if (fs.lstatSync(curPath).isDirectory()) {
clearDirFiles(curPath);
} else {
fs.unlinkSync(curPath);
}
fs.rmdirSync(dirPath, {
recursive: true
});
};

View File

@ -9,7 +9,6 @@ import {
DatasetCollectionSchemaType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '../training/schema';
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetData } from '../data/schema';
import { delImgByRelatedId } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';

View File

@ -0,0 +1,6 @@
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
}

View File

@ -1,14 +1,16 @@
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetTraining } from './schema';
import type {
PushDatasetDataChunkProps,
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import { getCollectionWithDataset } from '../controller';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { ClientSession } from '../../../common/mongo';
import { getLLMModel, getVectorModel } from '../../ai/model';
import { addLog } from '../../../common/system/log';
import { getCollectionWithDataset } from '../controller';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try {
@ -23,31 +25,52 @@ export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> =>
} catch (error) {}
};
export async function pushDataListToTrainingQueue({
teamId,
tmbId,
export const pushDataListToTrainingQueueByCollectionId = async ({
collectionId,
data,
prompt,
billId,
trainingMode = TrainingModeEnum.chunk
...props
}: {
teamId: string;
tmbId: string;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const vectorModelList = global.vectorModels;
const datasetModelList = global.llmModels;
session?: ClientSession;
} & PushDatasetDataProps) => {
const {
datasetId: { _id: datasetId, vectorModel, agentModel }
datasetId: { _id: datasetId, agentModel, vectorModel }
} = await getCollectionWithDataset(collectionId);
return pushDataListToTrainingQueue({
...props,
datasetId,
collectionId,
agentModel,
vectorModel
});
};
export async function pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId,
collectionId,
agentModel,
vectorModel,
data,
prompt,
billId,
trainingMode = TrainingModeEnum.chunk,
session
}: {
teamId: string;
tmbId: string;
datasetId: string;
agentModel: string;
vectorModel: string;
session?: ClientSession;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const checkModelValid = async () => {
const agentModelData = datasetModelList?.find((item) => item.model === agentModel);
const agentModelData = getLLMModel(agentModel);
if (!agentModelData) {
return Promise.reject(`File model ${agentModel} is inValid`);
}
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
const vectorModelData = getVectorModel(vectorModel);
if (!vectorModelData) {
return Promise.reject(`Vector model ${vectorModel} is inValid`);
}
@ -124,52 +147,43 @@ export async function pushDataListToTrainingQueue({
});
// insert data to db
const insertData = async (dataList: PushDatasetDataChunkProps[], retry = 3): Promise<number> => {
try {
const results = await MongoDatasetTraining.insertMany(
dataList.map((item, i) => ({
teamId,
tmbId,
datasetId,
collectionId,
billId,
mode: trainingMode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
}))
);
await delay(500);
return results.length;
} catch (error) {
if (retry > 0) {
await delay(500);
return insertData(dataList, retry - 1);
}
return Promise.reject(error);
}
};
const insertLen = filterResult.success.length;
const failedDocuments: PushDatasetDataChunkProps[] = [];
let insertLen = 0;
const chunkSize = 50;
const chunkList = filterResult.success.reduce(
(acc, cur) => {
const lastChunk = acc[acc.length - 1];
if (lastChunk.length < chunkSize) {
lastChunk.push(cur);
} else {
acc.push([cur]);
// 使用 insertMany 批量插入
try {
await MongoDatasetTraining.insertMany(
filterResult.success.map((item) => ({
teamId,
tmbId,
datasetId,
collectionId,
billId,
mode: trainingMode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
})),
{
session
}
return acc;
},
[[]] as PushDatasetDataChunkProps[][]
);
for await (const chunks of chunkList) {
insertLen += await insertData(chunks);
);
} catch (error: any) {
addLog.error(`Insert error`, error);
// 如果有错误,将失败的文档添加到失败列表中
error.writeErrors.forEach((writeError: any) => {
failedDocuments.push(data[writeError.index]);
});
console.log('failed', failedDocuments);
}
// 对于失败的文档,尝试单独插入
for await (const item of failedDocuments) {
await MongoDatasetTraining.create(item);
}
delete filterResult.success;

View File

@ -2,6 +2,7 @@ import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { addLog } from '../../../common/system/log';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { MongoDatasetTraining } from './schema';
import Papa from 'papaparse';
export const checkInvalidChunkAndLock = async ({
err,
@ -39,3 +40,18 @@ export const checkInvalidChunkAndLock = async ({
}
return false;
};
export const parseCsvTable2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
const chunks = csvArr
.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}))
.filter((item) => item.q || item.a);
return {
chunks
};
};

View File

@ -4,27 +4,36 @@
"dependencies": {
"@fastgpt/global": "workspace:*",
"@node-rs/jieba": "1.10.0",
"@xmldom/xmldom": "^0.8.10",
"axios": "^1.5.1",
"cheerio": "1.0.0-rc.12",
"cookie": "^0.5.0",
"date-fns": "2.30.0",
"dayjs": "^1.11.7",
"decompress": "^4.2.1",
"encoding": "^0.1.13",
"file-type": "^19.0.0",
"json5": "^2.2.3",
"jsonwebtoken": "^9.0.2",
"mammoth": "^1.6.0",
"mongoose": "^7.0.2",
"multer": "1.4.5-lts.1",
"next": "13.5.2",
"nextjs-cors": "^2.1.2",
"node-cron": "^3.0.3",
"node-xlsx": "^0.23.0",
"papaparse": "5.4.1",
"pdfjs-dist": "4.0.269",
"pg": "^8.10.0",
"tunnel": "^0.0.6"
},
"devDependencies": {
"@types/cookie": "^0.5.2",
"@types/decompress": "^4.2.7",
"@types/jsonwebtoken": "^9.0.3",
"@types/multer": "^1.4.10",
"@types/node-cron": "^3.0.11",
"@types/papaparse": "5.3.7",
"@types/pg": "^8.6.6",
"@types/tunnel": "^0.0.4"
}

View File

@ -0,0 +1,42 @@
import { AuthResponseType } from '@fastgpt/global/support/permission/type';
import { AuthModeType } from '../type';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { parseHeaderCert } from '../controller';
import { getFileById } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
export async function authFile({
fileId,
per = 'owner',
...props
}: AuthModeType & {
fileId: string;
}): Promise<
AuthResponseType & {
file: DatasetFileSchema;
}
> {
const authRes = await parseHeaderCert(props);
const { teamId, tmbId } = authRes;
const file = await getFileById({ bucketName: BucketNameEnum.dataset, fileId });
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
if (file.metadata?.teamId !== teamId) {
return Promise.reject(CommonErrEnum.unAuthFile);
}
if (per === 'owner' && file.metadata?.tmbId !== tmbId) {
return Promise.reject(CommonErrEnum.unAuthFile);
}
return {
...authRes,
isOwner: per === 'owner',
canWrite: per === 'owner',
file
};
}

View File

@ -1,40 +0,0 @@
import Papa from 'papaparse';
import { readFileRawText } from './rawText';
/**
* read csv to json
* @response {
* header: string[],
* data: string[][]
* }
*/
export const readCsvContent = async ({ file }: { file: File }) => {
try {
const { rawText: textArr } = await readFileRawText(file);
const csvArr = Papa.parse(textArr).data as string[][];
if (csvArr.length === 0) {
throw new Error('csv 解析失败');
}
const header = csvArr.shift() as string[];
// add title to data
const rawText = csvArr
.map((item) =>
item.map((value, index) => {
if (!header[index]) return value;
return `${header[index]}: ${value}`;
})
)
.flat()
.join('\n');
return {
rawText,
header,
data: csvArr.map((item) => item)
};
} catch (error) {
return Promise.reject('解析 csv 文件失败');
}
};

View File

@ -1,21 +0,0 @@
import { htmlStr2Md } from '../../string/markdown';
import { readFileRawText } from './rawText';
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
export const readHtmlFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const { rawText } = await readFileRawText(file);
const md = htmlStr2Md(rawText);
const simpleMd = await markdownProcess({
rawText: md,
uploadImgController
});
return { rawText: simpleMd };
};

View File

@ -1,49 +0,0 @@
import { loadFile2Buffer } from '../utils';
import { readCsvContent } from './csv';
import { readHtmlFile } from './html';
import { readMdFile } from './md';
import { readPdfFile } from './pdf';
import { readFileRawText } from './rawText';
import { readWordFile } from './word';
export const readFileRawContent = async ({
file,
uploadBase64Controller
}: {
file: File;
uploadBase64Controller?: (base64: string) => Promise<string>;
}): Promise<{
rawText: string;
}> => {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
switch (extension) {
case 'txt':
return readFileRawText(file);
case 'md':
return readMdFile({
file,
uploadImgController: uploadBase64Controller
});
case 'html':
return readHtmlFile({
file,
uploadImgController: uploadBase64Controller
});
case 'csv':
return readCsvContent({ file });
case 'pdf':
const pdf = await loadFile2Buffer({ file });
return readPdfFile({ pdf });
case 'docx':
return readWordFile({
file,
uploadImgController: uploadBase64Controller
});
default:
return {
rawText: ''
};
}
};

View File

@ -1,17 +0,0 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { readFileRawText } from './rawText';
export const readMdFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const { rawText: md } = await readFileRawText(file);
const simpleMd = await markdownProcess({
rawText: md,
uploadImgController
});
return { rawText: simpleMd };
};

View File

@ -1,36 +0,0 @@
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
/**
* read file raw text
*/
export const readFileRawText = (file: File) => {
return new Promise<{ rawText: string }>((resolve, reject) => {
try {
const reader = new FileReader();
reader.onload = () => {
//@ts-ignore
const encode = detectFileEncoding(reader.result);
// 再次读取文件,这次使用检测到的编码
const reader2 = new FileReader();
reader2.onload = () => {
resolve({
rawText: reader2.result as string
});
};
reader2.onerror = (err) => {
console.log('Error reading file with detected encoding:', err);
reject('Read file error with detected encoding');
};
reader2.readAsText(file, encode);
};
reader.onerror = (err) => {
console.log('error txt read:', err);
reject('Read file error');
};
reader.readAsBinaryString(file);
} catch (error) {
reject(error);
}
});
};

View File

@ -1,28 +0,0 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { htmlStr2Md } from '../../string/markdown';
import { loadFile2Buffer } from '../utils';
import mammoth from 'mammoth';
export const readWordFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const buffer = await loadFile2Buffer({ file });
const { value: html } = await mammoth.convertToHtml({
arrayBuffer: buffer
});
const md = htmlStr2Md(html);
const rawText = await markdownProcess({
rawText: md,
uploadImgController: uploadImgController
});
return {
rawText
};
};

View File

@ -101,6 +101,7 @@ export const iconPaths = {
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
'core/dataset/splitLight': () => import('./icons/core/dataset/splitLight.svg'),
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
'core/modules/basicNode': () => import('./icons/core/modules/basicNode.svg'),

View File

@ -0,0 +1,6 @@
<svg t="1711938287623" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
p-id="5143">
<path
d="M153.6 153.6h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 1 1 0-102.4z m0 614.4h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m0-307.2h131.6352a51.2 51.2 0 1 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m292.5568 0h131.6864a51.2 51.2 0 0 1 0 102.4H446.1568a51.2 51.2 0 0 1 0-102.4z m292.608 0H870.4a51.2 51.2 0 0 1 0 102.4h-131.6352a51.2 51.2 0 0 1 0-102.4z"
p-id="5144"></path>
</svg>

After

Width:  |  Height:  |  Size: 554 B

View File

@ -0,0 +1,70 @@
import React from 'react';
import MyIcon from '../Icon';
import {
Drawer,
DrawerBody,
DrawerHeader,
DrawerOverlay,
DrawerContent,
DrawerCloseButton,
DrawerContentProps,
Flex,
Image
} from '@chakra-ui/react';
import { useLoading } from '../../../hooks/useLoading';
type Props = DrawerContentProps & {
onClose: () => void;
iconSrc?: string;
title?: any;
isLoading?: boolean;
};
const MyRightDrawer = ({
onClose,
iconSrc,
title,
maxW = ['90vw', '30vw'],
children,
isLoading,
...props
}: Props) => {
const { Loading } = useLoading();
return (
<Drawer isOpen placement="right" onClose={onClose}>
<DrawerOverlay />
<DrawerContent
maxW={maxW}
{...props}
h={'94%'}
mt={'2%'}
borderLeftRadius={'lg'}
overflow={'hidden'}
>
<DrawerCloseButton />
<DrawerHeader>
<Flex alignItems={'center'} pr={2}>
{iconSrc && (
<>
{iconSrc.startsWith('/') ? (
<Image mr={3} objectFit={'contain'} alt="" src={iconSrc} w={'20px'} />
) : (
<MyIcon mr={3} name={iconSrc as any} w={'20px'} />
)}
</>
)}
{title}
</Flex>
<DrawerCloseButton zIndex={1} />
</DrawerHeader>
<DrawerBody>
{children}
<Loading loading={isLoading} fixed={false} />
</DrawerBody>
</DrawerContent>
</Drawer>
);
};
export default MyRightDrawer;

View File

@ -2,6 +2,8 @@ import React from 'react';
import { Box, Flex, useTheme, Grid, type GridProps } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import MyTooltip from '../MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import QuestionTip from '../MyTooltip/QuestionTip';
// @ts-ignore
interface Props extends GridProps {
@ -36,58 +38,59 @@ const LeftRadio = ({
return (
<Grid gridGap={[3, 5]} fontSize={['sm', 'md']} {...props}>
{list.map((item) => (
<MyTooltip key={item.value} label={item.tooltip}>
<Flex
alignItems={item.desc ? align : 'center'}
cursor={'pointer'}
userSelect={'none'}
px={px}
py={py}
border={theme.borders.sm}
borderWidth={'1px'}
borderRadius={'md'}
position={'relative'}
{...(value === item.value
? {
borderColor: 'primary.400',
bg: activeBg,
boxShadow: 'focus'
<Flex
alignItems={item.desc ? align : 'center'}
key={item.value}
cursor={'pointer'}
userSelect={'none'}
px={px}
py={py}
border={theme.borders.sm}
borderWidth={'1px'}
borderRadius={'md'}
position={'relative'}
{...(value === item.value
? {
borderColor: 'primary.400',
bg: activeBg,
boxShadow: 'focus'
}
: {
bg: defaultBg,
_hover: {
borderColor: 'primary.300'
}
: {
bg: defaultBg,
_hover: {
borderColor: 'primary.300'
}
})}
onClick={() => onChange(item.value)}
})}
onClick={() => onChange(item.value)}
>
<Box
w={'18px'}
h={'18px'}
borderWidth={'2.4px'}
borderColor={value === item.value ? 'primary.015' : 'transparent'}
borderRadius={'50%'}
mr={3}
>
<Box
w={'18px'}
h={'18px'}
borderWidth={'2.4px'}
borderColor={value === item.value ? 'primary.015' : 'transparent'}
<Flex
w={'100%'}
h={'100%'}
borderWidth={'1px'}
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
bg={value === item.value ? 'primary.1' : 'transparent'}
borderRadius={'50%'}
mr={3}
alignItems={'center'}
justifyContent={'center'}
>
<Flex
w={'100%'}
h={'100%'}
borderWidth={'1px'}
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
bg={value === item.value ? 'primary.1' : 'transparent'}
<Box
w={'5px'}
h={'5px'}
borderRadius={'50%'}
alignItems={'center'}
justifyContent={'center'}
>
<Box
w={'5px'}
h={'5px'}
borderRadius={'50%'}
bg={value === item.value ? 'primary.600' : 'transparent'}
></Box>
</Flex>
</Box>
<Box flex={'1 0 0'}>
bg={value === item.value ? 'primary.600' : 'transparent'}
></Box>
</Flex>
</Box>
<Box flex={'1 0 0'}>
<Flex alignItems={'center'}>
<Box
color={'myGray.900'}
fontWeight={item.desc ? '500' : 'normal'}
@ -95,15 +98,16 @@ const LeftRadio = ({
>
{typeof item.title === 'string' ? t(item.title) : item.title}
</Box>
{!!item.desc && (
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
{t(item.desc)}
</Box>
)}
{item?.children}
</Box>
</Flex>
</MyTooltip>
{!!item.tooltip && <QuestionTip label={item.tooltip} ml={1} color={'myGray.600'} />}
</Flex>
{!!item.desc && (
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
{t(item.desc)}
</Box>
)}
{item?.children}
</Box>
</Flex>
))}
</Grid>
);

View File

@ -12,31 +12,31 @@
"@emotion/styled": "^11.11.0",
"@fastgpt/global": "workspace:*",
"@fingerprintjs/fingerprintjs": "^4.2.1",
"@lexical/react": "0.12.6",
"@lexical/text": "0.12.6",
"@lexical/utils": "0.12.6",
"@monaco-editor/react": "^4.6.0",
"mammoth": "^1.6.0",
"@tanstack/react-query": "^4.24.10",
"date-fns": "2.30.0",
"dayjs": "^1.11.7",
"i18next": "23.10.0",
"joplin-turndown-plugin-gfm": "^1.0.12",
"lexical": "0.12.6",
"lodash": "^4.17.21",
"mammoth": "^1.6.0",
"next-i18next": "15.2.0",
"papaparse": "^5.4.1",
"pdfjs-dist": "4.0.269",
"react": "18.2.0",
"react-day-picker": "^8.7.1",
"react-dom": "18.2.0",
"react-i18next": "13.5.0",
"turndown": "^7.1.2",
"lexical": "0.12.6",
"@lexical/react": "0.12.6",
"papaparse": "^5.4.1",
"@lexical/utils": "0.12.6",
"@lexical/text": "0.12.6",
"date-fns": "2.30.0",
"react-day-picker": "^8.7.1",
"lodash": "^4.17.21",
"@tanstack/react-query": "^4.24.10",
"dayjs": "^1.11.7"
"turndown": "^7.1.2"
},
"devDependencies": {
"@types/lodash": "^4.14.191",
"@types/react": "18.2.0",
"@types/papaparse": "^5.3.7",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.0",
"@types/turndown": "^5.0.4"
}

View File

@ -99,6 +99,9 @@ importers:
'@node-rs/jieba':
specifier: 1.10.0
version: 1.10.0
'@xmldom/xmldom':
specifier: ^0.8.10
version: 0.8.10
axios:
specifier: ^1.5.1
version: 1.6.8
@ -114,15 +117,24 @@ importers:
dayjs:
specifier: ^1.11.7
version: 1.11.10
decompress:
specifier: ^4.2.1
version: 4.2.1
encoding:
specifier: ^0.1.13
version: 0.1.13
file-type:
specifier: ^19.0.0
version: 19.0.0
json5:
specifier: ^2.2.3
version: 2.2.3
jsonwebtoken:
specifier: ^9.0.2
version: 9.0.2
mammoth:
specifier: ^1.6.0
version: 1.7.0
mongoose:
specifier: ^7.0.2
version: 7.6.10
@ -138,6 +150,15 @@ importers:
node-cron:
specifier: ^3.0.3
version: 3.0.3
node-xlsx:
specifier: ^0.23.0
version: 0.23.0
papaparse:
specifier: 5.4.1
version: 5.4.1
pdfjs-dist:
specifier: 4.0.269
version: 4.0.269(encoding@0.1.13)
pg:
specifier: ^8.10.0
version: 8.11.3
@ -148,6 +169,9 @@ importers:
'@types/cookie':
specifier: ^0.5.2
version: 0.5.4
'@types/decompress':
specifier: ^4.2.7
version: 4.2.7
'@types/jsonwebtoken':
specifier: ^9.0.3
version: 9.0.6
@ -157,6 +181,9 @@ importers:
'@types/node-cron':
specifier: ^3.0.11
version: 3.0.11
'@types/papaparse':
specifier: 5.3.7
version: 5.3.7
'@types/pg':
specifier: ^8.6.6
version: 8.11.3
@ -240,7 +267,7 @@ importers:
version: 5.4.1
pdfjs-dist:
specifier: 4.0.269
version: 4.0.269
version: 4.0.269(encoding@0.1.13)
react:
specifier: 18.2.0
version: 18.2.0
@ -3789,10 +3816,9 @@ packages:
yjs: 13.6.14
dev: false
/@mapbox/node-pre-gyp@1.0.11:
/@mapbox/node-pre-gyp@1.0.11(encoding@0.1.13):
resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==}
hasBin: true
requiresBuild: true
dependencies:
detect-libc: 2.0.3
https-proxy-agent: 5.0.1
@ -4522,6 +4548,10 @@ packages:
use-sync-external-store: 1.2.0(react@18.2.0)
dev: false
/@tokenizer/token@0.3.0:
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
dev: false
/@trysound/sax@0.2.0:
resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==}
engines: {node: '>=10.13.0'}
@ -4737,6 +4767,12 @@ packages:
'@types/ms': 0.7.34
dev: false
/@types/decompress@4.2.7:
resolution: {integrity: sha512-9z+8yjKr5Wn73Pt17/ldnmQToaFHZxK0N1GHysuk/JIPT8RIdQeoInM01wWPgypRcvb6VH1drjuFpQ4zmY437g==}
dependencies:
'@types/node': 20.11.30
dev: true
/@types/estree@1.0.5:
resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==}
dev: true
@ -4876,6 +4912,12 @@ packages:
'@types/node': 20.11.30
dev: true
/@types/papaparse@5.3.7:
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==}
dependencies:
'@types/node': 20.11.30
dev: true
/@types/parse-json@4.0.2:
resolution: {integrity: sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==}
@ -5550,6 +5592,13 @@ packages:
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
engines: {node: '>=8'}
/bl@1.2.3:
resolution: {integrity: sha512-pvcNpa0UU69UT341rO6AYy4FVAIkUHuZXRIWbq+zHnsVcRzDDjIAhGuuYoi0d//cwIwtt4pkpKycWEfjdV+vww==}
dependencies:
readable-stream: 2.3.8
safe-buffer: 5.2.1
dev: false
/bluebird@3.4.7:
resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==}
dev: false
@ -5610,10 +5659,29 @@ packages:
engines: {node: '>=14.20.1'}
dev: false
/buffer-alloc-unsafe@1.1.0:
resolution: {integrity: sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg==}
dev: false
/buffer-alloc@1.2.0:
resolution: {integrity: sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==}
dependencies:
buffer-alloc-unsafe: 1.1.0
buffer-fill: 1.0.0
dev: false
/buffer-crc32@0.2.13:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
dev: false
/buffer-equal-constant-time@1.0.1:
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
dev: false
/buffer-fill@1.0.0:
resolution: {integrity: sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ==}
dev: false
/buffer-from@1.1.2:
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
dev: false
@ -5623,6 +5691,13 @@ packages:
engines: {node: '>=4'}
dev: false
/buffer@5.7.1:
resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
dependencies:
base64-js: 1.5.1
ieee754: 1.2.1
dev: false
/busboy@1.6.0:
resolution: {integrity: sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==}
engines: {node: '>=10.16.0'}
@ -5665,12 +5740,12 @@ packages:
/caniuse-lite@1.0.30001599:
resolution: {integrity: sha512-LRAQHZ4yT1+f9LemSMeqdMpMxZcc4RMWdj4tiFe3G8tNkWK+E58g+/tzotb5cU6TbcVJLr4fySiAW7XmxQvZQA==}
/canvas@2.11.2:
/canvas@2.11.2(encoding@0.1.13):
resolution: {integrity: sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==}
engines: {node: '>=6'}
requiresBuild: true
dependencies:
'@mapbox/node-pre-gyp': 1.0.11
'@mapbox/node-pre-gyp': 1.0.11(encoding@0.1.13)
nan: 2.19.0
simple-get: 3.1.1
transitivePeerDependencies:
@ -5909,6 +5984,10 @@ packages:
engines: {node: '>=16'}
dev: true
/commander@2.20.3:
resolution: {integrity: sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==}
dev: false
/commander@7.2.0:
resolution: {integrity: sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==}
engines: {node: '>= 10'}
@ -6469,6 +6548,59 @@ packages:
dev: false
optional: true
/decompress-tar@4.1.1:
resolution: {integrity: sha512-JdJMaCrGpB5fESVyxwpCx4Jdj2AagLmv3y58Qy4GE6HMVjWz1FeVQk1Ct4Kye7PftcdOo/7U7UKzYBJgqnGeUQ==}
engines: {node: '>=4'}
dependencies:
file-type: 5.2.0
is-stream: 1.1.0
tar-stream: 1.6.2
dev: false
/decompress-tarbz2@4.1.1:
resolution: {integrity: sha512-s88xLzf1r81ICXLAVQVzaN6ZmX4A6U4z2nMbOwobxkLoIIfjVMBg7TeguTUXkKeXni795B6y5rnvDw7rxhAq9A==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
file-type: 6.2.0
is-stream: 1.1.0
seek-bzip: 1.0.6
unbzip2-stream: 1.4.3
dev: false
/decompress-targz@4.1.1:
resolution: {integrity: sha512-4z81Znfr6chWnRDNfFNqLwPvm4db3WuZkqV+UgXQzSngG3CEKdBkw5jrv3axjjL96glyiiKjsxJG3X6WBZwX3w==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
file-type: 5.2.0
is-stream: 1.1.0
dev: false
/decompress-unzip@4.0.1:
resolution: {integrity: sha512-1fqeluvxgnn86MOh66u8FjbtJpAFv5wgCT9Iw8rcBqQcCo5tO8eiJw7NNTrvt9n4CRBVq7CstiS922oPgyGLrw==}
engines: {node: '>=4'}
dependencies:
file-type: 3.9.0
get-stream: 2.3.1
pify: 2.3.0
yauzl: 2.10.0
dev: false
/decompress@4.2.1:
resolution: {integrity: sha512-e48kc2IjU+2Zw8cTb6VZcJQ3lgVbS4uuB1TfCHbiZIP/haNXm+SVyhu+87jts5/3ROpd82GSVCoNs/z8l4ZOaQ==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
decompress-tarbz2: 4.1.1
decompress-targz: 4.1.1
decompress-unzip: 4.0.1
graceful-fs: 4.2.11
make-dir: 1.3.0
pify: 2.3.0
strip-dirs: 2.1.0
dev: false
/deep-eql@4.1.3:
resolution: {integrity: sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw==}
engines: {node: '>=6'}
@ -6712,6 +6844,12 @@ packages:
iconv-lite: 0.6.3
dev: false
/end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
dependencies:
once: 1.4.0
dev: false
/enhanced-resolve@5.16.0:
resolution: {integrity: sha512-O+QWCviPNSSLAD9Ucn8Awv+poAkqn3T1XY5/N7kR7rQO9yfSGWkYZDwpJ+iKF7B8rxaQKWngSqACpgzeapSyoA==}
engines: {node: '>=10.13.0'}
@ -7403,6 +7541,12 @@ packages:
dependencies:
format: 0.2.2
/fd-slicer@1.1.0:
resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==}
dependencies:
pend: 1.2.0
dev: false
/file-entry-cache@6.0.1:
resolution: {integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==}
engines: {node: ^10.12.0 || >=12.0.0}
@ -7410,6 +7554,30 @@ packages:
flat-cache: 3.2.0
dev: true
/file-type@19.0.0:
resolution: {integrity: sha512-s7cxa7/leUWLiXO78DVVfBVse+milos9FitauDLG1pI7lNaJ2+5lzPnr2N24ym+84HVwJL6hVuGfgVE+ALvU8Q==}
engines: {node: '>=18'}
dependencies:
readable-web-to-node-stream: 3.0.2
strtok3: 7.0.0
token-types: 5.0.1
dev: false
/file-type@3.9.0:
resolution: {integrity: sha512-RLoqTXE8/vPmMuTI88DAzhMYC99I8BWv7zYP4A1puo5HIjEJ5EX48ighy4ZyKMG9EDXxBgW6e++cn7d1xuFghA==}
engines: {node: '>=0.10.0'}
dev: false
/file-type@5.2.0:
resolution: {integrity: sha512-Iq1nJ6D2+yIO4c8HHg4fyVb8mAJieo1Oloy1mLLaB2PvezNedhBVm+QU7g0qM42aiMbRXTxKKwGD17rjKNJYVQ==}
engines: {node: '>=4'}
dev: false
/file-type@6.2.0:
resolution: {integrity: sha512-YPcTBDV+2Tm0VqjybVd32MHdlEGAtuxS3VAYsumFokDSMG+ROT5wawGlnHDoz7bfMcMDt9hxuXvXwoKUx2fkOg==}
engines: {node: '>=4'}
dev: false
/fill-range@7.0.1:
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
engines: {node: '>=8'}
@ -7550,6 +7718,10 @@ packages:
engines: {node: '>= 0.6'}
dev: false
/fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
dev: false
/fs-minipass@2.1.0:
resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==}
engines: {node: '>= 8'}
@ -7626,6 +7798,14 @@ packages:
engines: {node: '>=6'}
dev: false
/get-stream@2.3.1:
resolution: {integrity: sha512-AUGhbbemXxrZJRD5cDvKtQxLuYaIbNtDTK8YqupCI393Q2KSTreEsLUN3ZxAWFGiKTzL6nKuzfcIvieflUX9qA==}
engines: {node: '>=0.10.0'}
dependencies:
object-assign: 4.1.1
pinkie-promise: 2.0.1
dev: false
/get-stream@6.0.1:
resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==}
engines: {node: '>=10'}
@ -7978,6 +8158,10 @@ packages:
safer-buffer: 2.1.2
dev: false
/ieee754@1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: false
/ignore@5.3.1:
resolution: {integrity: sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==}
engines: {node: '>= 4'}
@ -8178,6 +8362,10 @@ packages:
engines: {node: '>= 0.4'}
dev: true
/is-natural-number@4.0.1:
resolution: {integrity: sha512-Y4LTamMe0DDQIIAlaer9eKebAlDSV6huy+TWhJVPlzZh2o4tRP5SQWFlLn5N0To4mDD22/qdOq+veo1cSISLgQ==}
dev: false
/is-negative-zero@2.0.3:
resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==}
engines: {node: '>= 0.4'}
@ -8229,6 +8417,11 @@ packages:
call-bind: 1.0.7
dev: true
/is-stream@1.1.0:
resolution: {integrity: sha512-uQPm8kcs47jx38atAcWTVxyltQYoPT68y9aWYdV6yWXSyW8mzSat0TL6CiWdZeCdF3KrAvpVtnHbTv4RN+rqdQ==}
engines: {node: '>=0.10.0'}
dev: false
/is-stream@3.0.0:
resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@ -8704,6 +8897,13 @@ packages:
'@jridgewell/sourcemap-codec': 1.4.15
dev: true
/make-dir@1.3.0:
resolution: {integrity: sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==}
engines: {node: '>=4'}
dependencies:
pify: 3.0.0
dev: false
/make-dir@3.1.0:
resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
engines: {node: '>=8'}
@ -9547,6 +9747,14 @@ packages:
/node-releases@2.0.14:
resolution: {integrity: sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==}
/node-xlsx@0.23.0:
resolution: {integrity: sha512-r3KaSZSsSrK92rbPXnX/vDdxURmPPik0rjJ3A+Pybzpjyrk4G6WyGfj8JIz5dMMEpCmWVpmO4qoVPBxnpLv/8Q==}
engines: {node: '>=10.0.0'}
hasBin: true
dependencies:
xlsx: '@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz'
dev: false
/non-layered-tidy-tree-layout@2.0.2:
resolution: {integrity: sha512-gkXMxRzUH+PB0ax9dUN0yYF0S25BqeAYqhgMaLUFmpXLEk7Fcu8f4emJuOAY0V8kjDICxROIKsTAKsV/v355xw==}
dev: false
@ -9875,17 +10083,26 @@ packages:
resolution: {integrity: sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==}
dev: true
/pdfjs-dist@4.0.269:
/pdfjs-dist@4.0.269(encoding@0.1.13):
resolution: {integrity: sha512-jjWO56tcOjnmPqDf8PmXDeZ781AGvpHMYI3HhNtaFKTRXXPaD1ArSrhVe38/XsrIQJ0onISCND/vuXaWJkiDWw==}
engines: {node: '>=18'}
optionalDependencies:
canvas: 2.11.2
canvas: 2.11.2(encoding@0.1.13)
path2d-polyfill: 2.1.1
transitivePeerDependencies:
- encoding
- supports-color
dev: false
/peek-readable@5.0.0:
resolution: {integrity: sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==}
engines: {node: '>=14.16'}
dev: false
/pend@1.2.0:
resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
dev: false
/pg-cloudflare@1.1.1:
resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==}
requiresBuild: true
@ -9979,6 +10196,28 @@ packages:
hasBin: true
dev: true
/pify@2.3.0:
resolution: {integrity: sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==}
engines: {node: '>=0.10.0'}
dev: false
/pify@3.0.0:
resolution: {integrity: sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg==}
engines: {node: '>=4'}
dev: false
/pinkie-promise@2.0.1:
resolution: {integrity: sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw==}
engines: {node: '>=0.10.0'}
dependencies:
pinkie: 2.0.4
dev: false
/pinkie@2.0.4:
resolution: {integrity: sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg==}
engines: {node: '>=0.10.0'}
dev: false
/pkg-types@1.0.3:
resolution: {integrity: sha512-nN7pYi0AQqJnoLPC9eHFQ8AcyaixBUOwvqc5TDnIKCMEE6I0y8P7OKA7fPexsXGCGxQDl/cmrLAp26LhcwxZ4A==}
dependencies:
@ -10396,7 +10635,13 @@ packages:
string_decoder: 1.3.0
util-deprecate: 1.0.2
dev: false
optional: true
/readable-web-to-node-stream@3.0.2:
resolution: {integrity: sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==}
engines: {node: '>=8'}
dependencies:
readable-stream: 3.6.2
dev: false
/readdirp@3.6.0:
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
@ -10715,6 +10960,13 @@ packages:
dependencies:
loose-envify: 1.4.0
/seek-bzip@1.0.6:
resolution: {integrity: sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ==}
hasBin: true
dependencies:
commander: 2.20.3
dev: false
/semver@6.3.1:
resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==}
hasBin: true
@ -11024,7 +11276,6 @@ packages:
dependencies:
safe-buffer: 5.2.1
dev: false
optional: true
/strip-ansi@6.0.1:
resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
@ -11044,6 +11295,12 @@ packages:
engines: {node: '>=4'}
dev: true
/strip-dirs@2.1.0:
resolution: {integrity: sha512-JOCxOeKLm2CAS73y/U4ZeZPTkE+gNVCzKt7Eox84Iej1LT/2pTWYpZKJuxwQpvX1LiZb1xokNR7RLfuBAa7T3g==}
dependencies:
is-natural-number: 4.0.1
dev: false
/strip-final-newline@3.0.0:
resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==}
engines: {node: '>=12'}
@ -11060,6 +11317,14 @@ packages:
js-tokens: 8.0.3
dev: true
/strtok3@7.0.0:
resolution: {integrity: sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==}
engines: {node: '>=14.16'}
dependencies:
'@tokenizer/token': 0.3.0
peek-readable: 5.0.0
dev: false
/style-to-object@0.4.4:
resolution: {integrity: sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==}
dependencies:
@ -11131,6 +11396,19 @@ packages:
engines: {node: '>=6'}
dev: true
/tar-stream@1.6.2:
resolution: {integrity: sha512-rzS0heiNf8Xn7/mpdSVVSMAWAoy9bfb1WOTYC78Z0UQKeKa/CWS8FOq0lKGNa8DWKAn9gxjCvMLYc5PGXYlK2A==}
engines: {node: '>= 0.8.0'}
dependencies:
bl: 1.2.3
buffer-alloc: 1.2.0
end-of-stream: 1.4.4
fs-constants: 1.0.0
readable-stream: 2.3.8
to-buffer: 1.1.1
xtend: 4.0.2
dev: false
/tar@6.2.0:
resolution: {integrity: sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==}
engines: {node: '>=10'}
@ -11149,6 +11427,10 @@ packages:
resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==}
dev: true
/through@2.3.8:
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
dev: false
/timezones-list@3.0.3:
resolution: {integrity: sha512-C+Vdvvj2c1xB6pu81pOX8geo6mrk/QsudFVlTVQET7QQwu8WAIyhDNeCrK5grU7EMzmbKLWqz7uU6dN8fvQvPQ==}
dev: false
@ -11171,6 +11453,10 @@ packages:
engines: {node: '>=14.0.0'}
dev: true
/to-buffer@1.1.1:
resolution: {integrity: sha512-lx9B5iv7msuFYE3dytT+KE5tap+rNYw+K4jVkb9R/asAb+pbBSM17jtunHplhBe6RRJdZx3Pn2Jph24O32mOVg==}
dev: false
/to-fast-properties@2.0.0:
resolution: {integrity: sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==}
engines: {node: '>=4'}
@ -11190,6 +11476,14 @@ packages:
engines: {node: '>=0.6'}
dev: false
/token-types@5.0.1:
resolution: {integrity: sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==}
engines: {node: '>=14.16'}
dependencies:
'@tokenizer/token': 0.3.0
ieee754: 1.2.1
dev: false
/tr46@0.0.3:
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
dev: false
@ -11369,6 +11663,13 @@ packages:
which-boxed-primitive: 1.0.2
dev: true
/unbzip2-stream@1.4.3:
resolution: {integrity: sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==}
dependencies:
buffer: 5.7.1
through: 2.3.8
dev: false
/underscore@1.13.6:
resolution: {integrity: sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==}
dev: false
@ -11943,6 +12244,13 @@ packages:
engines: {node: '>= 14'}
dev: true
/yauzl@2.10.0:
resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==}
dependencies:
buffer-crc32: 0.2.13
fd-slicer: 1.1.0
dev: false
/yjs@13.6.14:
resolution: {integrity: sha512-D+7KcUr0j+vBCUSKXXEWfA+bG4UQBviAwP3gYBhkstkgwy5+8diOPMx0iqLIOxNo/HxaREUimZRxqHGAHCL2BQ==}
engines: {node: '>=16.0.0', npm: '>=8.0.0'}
@ -12029,3 +12337,11 @@ packages:
/zwitch@2.0.4:
resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==}
dev: false
'@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz':
resolution: {tarball: https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz}
name: xlsx
version: 0.19.3
engines: {node: '>=0.8'}
hasBin: true
dev: false

View File

@ -82,7 +82,7 @@
"name": "Embedding-2",
"avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0,
"defaultToken": 700,
"defaultToken": 512,
"maxToken": 3000,
"weight": 100,
"dbConfig": {},

View File

@ -56,6 +56,7 @@
}
},
"common": {
"Action": "Action",
"Add": "Add",
"Add New": "Add",
"All": "All",
@ -79,6 +80,7 @@
"Create New": "Create",
"Create Success": "Create Success",
"Create Time": "Create time",
"Creating": "Creating",
"Custom Title": "Custom Title",
"Delete": "Delete",
"Delete Failed": "Delete Failed",
@ -191,6 +193,7 @@
"Empty file tip": "The file content is empty. The file may be unreadable or pure image file content.",
"File Content": "File Content",
"File Name": "File Name",
"File Size": "File Size",
"File content can not be empty": "File content can not be empty",
"Filename Can not Be Empty": "Filename Can not Be Empty",
"Read File Error": "Read file error",
@ -198,6 +201,7 @@
"Select failed": "Select file failed",
"Select file amount limit": "A maximum of {{max}} files can be selected",
"Select file amount limit 100": "You can select a maximum of 100 files at a time",
"Some file count exceeds limit": "The number of files exceeding {{maxCount}} has been automatically intercepted",
"Some file size exceeds limit": "Some files exceed: {{maxSize}}, have been filtered",
"Support file type": "Support {{fileType}} files",
"Support max count": "A maximum of {{maxCount}} files are supported.",
@ -620,7 +624,7 @@
"file": "File",
"folder": "Folder",
"import": {
"Auto mode Estimated Price Tips": "Enhanced processing calls the file processing model: {{price}} integral /1k Tokens",
"Auto mode Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
"Auto process": "Auto",
"Auto process desc": "Automatically set segmentation and preprocessing rules",
"CSV Import": "CSV QA Import",
@ -642,7 +646,7 @@
"Data file progress": "Data upload progress",
"Data process params": "Data process params",
"Down load csv template": "Down load csv template",
"Embedding Estimated Price Tips": "Index billing: {{price}}/1k Tokens",
"Embedding Estimated Price Tips": "Use only the index model and consume a small amount of Tokens: {{price}} credits /1k Tokens",
"Estimated Price": "Estimated Price: : {{amount}}{{unit}}",
"Estimated Price Tips": "QA charges: {{charsPointsPrice}} points/1k Tokens",
"Estimated points": "About {{points}} points",
@ -657,15 +661,19 @@
"Import Failed": "Import Failed",
"Import Success Tip": "The {{num}} group data is imported successfully. Please wait for training.",
"Import Tip": "This task cannot be terminated and takes some time to generate indexes. Please confirm the import. If the balance is insufficient, the unfinished task will be suspended and can continue after topping up.",
"Import success": "Import successful, please wait for training",
"Link name": "Link name",
"Link name placeholder": "Only static links are supported\nOne per line, up to 10 links at a time",
"Local file": "Local file",
"Local file desc": "Upload files in PDF, TXT, DOCX and other formats",
"Only Show First 50 Chunk": "Show only part",
"Preview chunks": "Chunks",
"Preview raw text": "Preview file text (max show 10000 words)",
"Predicted chunk": "Predicted chunk",
"Predicted chunk amount": "Predicted chunks:{{amount}}",
"Predicted total chars": "Predicted chars: {{total}}",
"Preview chunks": "Preview chunks",
"Preview raw text": "Preview file text (max show 3000 words)",
"Process way": "Process way",
"QA Estimated Price Tips": "QA billing: {{price}}/1k Tokens (including input and output)",
"QA Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
"QA Import": "QA Split",
"QA Import Tip": "According to certain rules, the text is broken into a larger paragraph, and the AI is invoked to generate a question and answer pair for the paragraph.",
"Re Preview": "RePreview",
@ -680,8 +688,8 @@
"Total tokens": "Tokens",
"Training mode": "Training mode",
"Upload data": "Upload data",
"Upload file progress": "File upload progress",
"Upload status": "Upload status",
"Upload file progress": "Upload state",
"Upload status": "Status",
"Upload success": "Upload success",
"Web link": "Web link",
"Web link desc": "Fetch static web content as a collection"
@ -1348,6 +1356,7 @@
"Pay error": "Pay error",
"Pay success": "Pay success",
"Plan expired time": "Plan expired time",
"Plan reset time": "Plan reset time",
"Standard Plan Detail": "Standard Plan Detail",
"To read plan": "Read plan",
"bill": {

View File

@ -56,6 +56,7 @@
}
},
"common": {
"Action": "操作",
"Add": "添加",
"Add New": "新增",
"All": "全部",
@ -79,6 +80,7 @@
"Create New": "新建",
"Create Success": "创建成功",
"Create Time": "创建时间",
"Creating": "创建中",
"Custom Title": "自定义标题",
"Delete": "删除",
"Delete Failed": "删除失败",
@ -191,6 +193,7 @@
"Empty file tip": "文件内容为空,可能该文件无法读取或为纯图片文件内容。",
"File Content": "文件内容",
"File Name": "文件名",
"File Size": "文件大小",
"File content can not be empty": "文件内容不能为空",
"Filename Can not Be Empty": "文件名不能为空",
"Read File Error": "解析文件失败",
@ -198,6 +201,7 @@
"Select failed": "选择文件异常",
"Select file amount limit": "最多选择 {{max}} 个文件",
"Select file amount limit 100": "每次最多选择100个文件",
"Some file count exceeds limit": "超出{{maxCount}}个文件,已自动截取",
"Some file size exceeds limit": "部分文件超出: {{maxSize}},已被过滤",
"Support file type": "支持 {{fileType}} 类型文件",
"Support max count": "最多支持 {{maxCount}} 个文件。",
@ -622,7 +626,7 @@
"file": "文件",
"folder": "目录",
"import": {
"Auto mode Estimated Price Tips": "增强处理需调用文件处理模型: {{price}}积分/1k Tokens",
"Auto mode Estimated Price Tips": "需调用文件处理模型需要消耗较多Tokens: {{price}}积分/1k Tokens",
"Auto process": "自动",
"Auto process desc": "自动设置分割和预处理规则",
"CSV Import": "CSV 导入",
@ -644,7 +648,7 @@
"Data file progress": "数据上传进度",
"Data process params": "数据处理参数",
"Down load csv template": "点击下载 CSV 模板",
"Embedding Estimated Price Tips": "索引计费: {{price}}积分/1k Tokens",
"Embedding Estimated Price Tips": "仅使用索引模型消耗少量Tokens: {{price}}积分/1k Tokens",
"Estimated Price": "预估价格: {{amount}}{{unit}}",
"Estimated Price Tips": "QA计费为\n输入: {{charsPointsPrice}}积分/1k Tokens",
"Estimated points": "预估消耗 {{points}} 积分",
@ -659,15 +663,19 @@
"Import Failed": "导入文件失败",
"Import Success Tip": "共成功导入 {{num}} 组数据,请耐心等待训练.",
"Import Tip": "该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。",
"Import success": "导入成功,请等待训练",
"Link name": "网络链接",
"Link name placeholder": "仅支持静态链接,如果上传后数据为空,可能该链接无法被读取\n每行一个每次最多 10 个链接",
"Local file": "本地文件",
"Local file desc": "上传 PDF, TXT, DOCX 等格式的文件",
"Only Show First 50 Chunk": "仅展示部分",
"Preview chunks": "分段预览",
"Preview raw text": "预览源文本最多展示10000字",
"Predicted chunk": "预估分段",
"Predicted chunk amount": "预估分段:{{amount}}",
"Predicted total chars": "预估字数: {{total}}",
"Preview chunks": "预览分段最多5段",
"Preview raw text": "预览源文本最多3000字",
"Process way": "处理方式",
"QA Estimated Price Tips": "QA计费为: {{price}}积分/1k Tokens(包含输入和输出)",
"QA Estimated Price Tips": "需调用文件处理模型需要消耗较多Tokens: {{price}}积分/1k Tokens",
"QA Import": "QA拆分",
"QA Import Tip": "根据一定规则,将文本拆成一段较大的段落,调用 AI 为该段落生成问答对。有非常高的检索精度,但是会丢失很多内容细节。",
"Re Preview": "重新生成预览",
@ -683,7 +691,7 @@
"Training mode": "训练模式",
"Upload data": "上传数据",
"Upload file progress": "文件上传进度",
"Upload status": "上传状态",
"Upload status": "状态",
"Upload success": "上传成功",
"Web link": "网页链接",
"Web link desc": "读取静态网页内容作为数据集"
@ -1350,6 +1358,7 @@
"Pay error": "支付失败",
"Pay success": "支付成功",
"Plan expired time": "套餐到期时间",
"Plan reset time": "套餐重置时间",
"Standard Plan Detail": "套餐详情",
"To read plan": "查看套餐",
"bill": {
@ -1407,7 +1416,7 @@
"Standard update fail": "修改订阅套餐异常",
"Standard update success": "变更订阅套餐成功!",
"Sub plan": "订阅套餐",
"Sub plan tip": "免费使用 FastGPT 或升级更高的套餐",
"Sub plan tip": "免费使用 {{title}} 或升级更高的套餐",
"Team plan and usage": "套餐与用量",
"Training weight": "训练优先级: {{weight}}",
"Update extra ai points": "额外AI积分",

View File

@ -2,6 +2,7 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import {
DatasetSearchModeEnum,
DatasetTypeEnum,
ImportDataSourceEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import {
@ -67,3 +68,24 @@ export type SearchTestResponse = {
similarity: number;
usingQueryExtension: boolean;
};
/* =========== training =========== */
export type PostPreviewFilesChunksProps = {
type: `${ImportDataSourceEnum}`;
sourceId: string;
chunkSize: number;
overlapRatio: number;
customSplitChar?: string;
};
export type PostPreviewFilesChunksResponse = {
fileId: string;
rawTextLength: number;
chunks: string[];
}[];
export type PostPreviewTableChunksResponse = {
fileId: string;
totalChunks: number;
chunks: { q: string; a: string; chunkIndex: number }[];
errorText?: string;
}[];

View File

@ -1,5 +0,0 @@
/* ================= dataset ===================== */
/* ================= collection ===================== */
/* ================= data ===================== */

View File

@ -397,14 +397,22 @@ const PlanUsage = () => {
<Box fontWeight={'bold'} fontSize="xl">
{t(planName)}
</Box>
<Flex mt="2" color={'#485264'} fontSize="sm">
<Box>{t('support.wallet.Plan expired time')}:</Box>
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
</Flex>
{isFreeTeam && (
<Box mt="2" color={'#485264'} fontSize="sm">
30使
</Box>
{isFreeTeam ? (
<>
<Flex mt="2" color={'#485264'} fontSize="sm">
<Box>{t('support.wallet.Plan reset time')}:</Box>
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
</Flex>
<Box mt="2" color={'#485264'} fontSize="sm">
30使
</Box>
</>
) : (
<Flex mt="2" color={'#485264'} fontSize="sm">
<Box>{t('support.wallet.Plan expired time')}:</Box>
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
</Flex>
)}
</Box>
<Button onClick={() => router.push('/price')}>

View File

@ -2,51 +2,15 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { addLog } from '@fastgpt/service/common/system/log';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
/*
1.
*/
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
let deleteImageAmount = 0;
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const {
startHour = 72,
endHour = 24,
limit = 10
} = req.body as { startHour?: number; endHour?: number; limit?: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
// start: now - maxDay, end: now - 3 day
const start = addHours(new Date(), -startHour);
const end = addHours(new Date(), -endHour);
deleteImageAmount = 0;
await checkInvalid(start, end, limit);
jsonRes(res, {
data: deleteImageAmount
});
} catch (error) {
addLog.error(`check Invalid user error`, error);
jsonRes(res, {
code: 500,
error
});
}
}
export async function checkInvalid(start: Date, end: Date, limit = 50) {
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find(
{
createTime: {
@ -86,3 +50,37 @@ export async function checkInvalid(start: Date, end: Date, limit = 50) {
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
}
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await authCert({ req, authRoot: true });
(async () => {
try {
console.log('执行脏数据清理任务');
const end = addHours(new Date(), -1);
const start = addHours(new Date(), -360 * 24);
await checkFiles(start, end);
await checkInvalidImg(start, end);
await checkInvalidCollection(start, end);
await checkInvalidVector(start, end);
console.log('执行脏数据清理任务完毕');
} catch (error) {
console.log('执行脏数据清理任务出错了');
}
})();
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -6,9 +6,52 @@ import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
let deleteImageAmount = 0;
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find(
{
createTime: {
$gte: start,
$lte: end
},
'metadata.relatedId': { $exists: true }
},
'_id teamId metadata'
);
console.log('total images', images.length);
let index = 0;
for await (const image of images) {
try {
// 1. 检测是否有对应的集合
const collection = await MongoDatasetCollection.findOne(
{
teamId: image.teamId,
'metadata.relatedImgId': image.metadata?.relatedId
},
'_id'
);
if (!collection) {
await image.deleteOne();
deleteImageAmount++;
}
index++;
index % 100 === 0 && console.log(index);
} catch (error) {
console.log(error);
}
}
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
}
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {

View File

@ -2,13 +2,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants';

View File

@ -0,0 +1,41 @@
/*
Read db file content and response 3000 words
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId, csvFormat } = req.body as { fileId: string; csvFormat?: boolean };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId } = await authFile({ req, authToken: true, fileId });
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat
});
jsonRes(res, {
data: {
previewContent: rawText.slice(0, 3000),
totalLength: rawText.length
}
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -2,9 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authFileToken } from '@fastgpt/service/support/permission/controller';
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import {
getDownloadStream,
getFileById,
readFileEncode
} from '@fastgpt/service/common/file/gridfs/controller';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@ -18,8 +21,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
throw new Error('fileId is empty');
}
const [file, encodeStream] = await Promise.all([
const [file, encoding, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
readFileEncode({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
@ -27,24 +31,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return Promise.reject(CommonErrEnum.fileNotFound);
}
// get encoding
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detectFileEncoding(buffers);
res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`);
res.setHeader('Cache-Control', 'public, max-age=3600');
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
const fileStream = await getDownloadStream({ bucketName, fileId });
fileStream.pipe(res);
fileStream.on('error', () => {

View File

@ -4,24 +4,22 @@ import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: 500 * 1024 * 1024
});
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let filePaths: string[] = [];
/* Creates the multer uploader */
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
const filePaths: string[] = [];
try {
const { teamId, tmbId } = await authCert({ req, authToken: true });
await connectToDatabase();
const { file, bucketName, metadata } = await upload.doUpload(req, res);
filePaths = [file.path];
await connectToDatabase();
filePaths.push(file.path);
const { teamId, tmbId } = await authCert({ req, authToken: true });
if (!bucketName) {
throw new Error('bucketName is empty');
@ -46,6 +44,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
error
});
}
removeFilesByPaths(filePaths);
}
export const config = {

View File

@ -12,12 +12,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const { teamId } = await authChatCert({ req, authToken: true });
const data = await uploadMongoImg({
const imgId = await uploadMongoImg({
teamId,
...body
});
jsonRes(res, { data });
jsonRes(res, { data: imgId });
} catch (error) {
jsonRes(res, {
code: 500,

View File

@ -0,0 +1,112 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId, parentId, fileId } = req.body as FileIdCreateDatasetCollectionParams;
const trainingType = TrainingModeEnum.chunk;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: datasetId
});
// 1. read file
const { rawText, filename } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const { chunks = [] } = parseCsvTable2Chunks(rawText);
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
teamId,
tmbId,
name: filename,
parentId,
datasetId,
type: DatasetCollectionTypeEnum.file,
fileId,
// special metadata
trainingType,
chunkSize: 0,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
billId,
data: chunks.map((chunk, index) => ({
q: chunk.q,
a: chunk.a,
chunkIndex: index
})),
session
});
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -1,94 +1,151 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delFileByFileIdList, uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import {
delFileByFileIdList,
readFileContent
} from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: 500 * 1024 * 1024
});
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let filePaths: string[] = [];
let fileId: string = '';
const { datasetId } = req.query as { datasetId: string };
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try {
await connectToDatabase();
const { teamId, tmbId } = await authDataset({
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId
datasetId: body.datasetId
});
const { file, bucketName, data } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { fileMetadata, collectionMetadata, ...collectionData } = data;
// upload file and create collection
fileId = await uploadFile({
// 1. read file
const { rawText, filename } = await readFileContent({
teamId,
tmbId,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// create collection
const { _id: collectionId } = await createOneCollection({
...collectionData,
metadata: collectionMetadata,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
bucketName: BucketNameEnum.dataset,
fileId
});
jsonRes(res, {
data: collectionId
// 2. split chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
if (fileId) {
try {
await delFileByFileIdList({
fileIdList: [fileId],
bucketName: BucketNameEnum.dataset
});
} catch (error) {}
}
jsonRes(res, {
code: 500,
error
});
}
removeFilesByPaths(filePaths);
}
export const config = {
api: {
bodyParser: false
}
};

View File

@ -19,6 +19,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@ -55,9 +56,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 3. create collection and training bill
const [{ _id: collectionId }, { billId }] = await Promise.all([
createOneCollection({
const createResult = await mongoSessionRun(async (session) => {
// 3. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
@ -70,34 +71,44 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
qaPrompt,
hashRawText: hashStr(text),
rawTextLength: text.length
}),
createTrainingUsage({
rawTextLength: text.length,
session
});
// 4. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: name,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name
})
]);
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 4. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
collectionId,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
// 5. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
})),
session
});
return { collectionId, results: insertResults };
});
jsonRes(res, {
data: { collectionId, results: insertResults }
data: createResult
});
} catch (err) {
jsonRes(res, {

View File

@ -15,7 +15,8 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { collectionId, data } = req.body as PushDatasetDataProps;
const body = req.body as PushDatasetDataProps;
const { collectionId, data } = body;
if (!collectionId || !Array.isArray(data)) {
throw new Error('collectionId or data is empty');
@ -42,9 +43,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
jsonRes<PushDatasetDataResponse>(res, {
data: await pushDataListToTrainingQueue({
...req.body,
...body,
teamId,
tmbId
tmbId,
datasetId: collection.datasetId._id,
agentModel: collection.datasetId.agentModel,
vectorModel: collection.datasetId.vectorModel
})
});
} catch (err) {

View File

@ -0,0 +1,80 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { PostPreviewFilesChunksProps } from '@/global/core/dataset/api';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { type, sourceId, chunkSize, customSplitChar, overlapRatio } =
req.body as PostPreviewFilesChunksProps;
if (!sourceId) {
throw new Error('fileIdList is empty');
}
if (chunkSize > 30000) {
throw new Error('chunkSize is too large, should be less than 30000');
}
const { chunks } = await (async () => {
if (type === ImportDataSourceEnum.fileLocal) {
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
const fileId = String(file._id);
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat: true
});
// split chunks (5 chunk)
const sliceRawText = 10 * chunkSize;
const { chunks } = splitText2Chunks({
text: rawText.slice(0, sliceRawText),
chunkLen: chunkSize,
overlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return {
chunks: chunks.map((item) => ({
q: item,
a: ''
}))
};
}
if (type === ImportDataSourceEnum.csvTable) {
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
const fileId = String(file._id);
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat: false
});
const { chunks } = parseCsvTable2Chunks(rawText);
return {
chunks: chunks || []
};
}
return { chunks: [] };
})();
jsonRes<{ q: string; a: string }[]>(res, {
data: chunks.slice(0, 5)
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -32,7 +32,6 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput';
import dayjs from 'dayjs';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useLoading } from '@fastgpt/web/hooks/useLoading';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@/components/MyMenu';
@ -62,11 +61,11 @@ import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants';
import MyBox from '@/components/common/MyBox';
import { ImportDataSourceEnum } from './Import';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
const FileSourceSelector = dynamic(() => import('./Import/sourceSelector/FileSourceSelector'), {});
const FileSourceSelector = dynamic(() => import('./Import/components/FileSourceSelector'), {});
const CollectionCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@ -76,14 +75,14 @@ const CollectionCard = () => {
const { toast } = useToast();
const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string };
const { t } = useTranslation();
const { Loading } = useLoading();
const { isPc } = useSystemStore();
const { userInfo } = useUserStore();
const [searchText, setSearchText] = useState('');
const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore();
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
content: t('dataset.Confirm to delete the file')
content: t('dataset.Confirm to delete the file'),
type: 'delete'
});
const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({
content: t('core.dataset.collection.Start Sync Tip')
@ -452,7 +451,7 @@ const CollectionCard = () => {
query: {
...router.query,
currentTab: TabEnum.import,
source: ImportDataSourceEnum.tableLocal
source: ImportDataSourceEnum.csvTable
}
})
}

View File

@ -1,6 +1,5 @@
import React, { useContext, useCallback, createContext, useState, useMemo, useEffect } from 'react';
import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
@ -8,6 +7,7 @@ import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
type ChunkSizeFieldType = 'embeddingChunkSize';
export type FormType = {
@ -29,14 +29,11 @@ type useImportStoreType = {
showPromptInput: boolean;
sources: ImportSourceItemType[];
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showRePreview: boolean;
totalChunkChars: number;
totalChunks: number;
chunkSize: number;
predictPoints: number;
chunkOverlapRatio: number;
priceTip: string;
uploadRate: number;
splitSources2Chunks: () => void;
importSource: `${ImportDataSourceEnum}`;
};
const StateContext = createContext<useImportStoreType>({
processParamsForm: {} as any,
@ -49,23 +46,22 @@ const StateContext = createContext<useImportStoreType>({
showChunkInput: false,
showPromptInput: false,
chunkSizeField: 'embeddingChunkSize',
showRePreview: false,
totalChunkChars: 0,
totalChunks: 0,
chunkSize: 0,
predictPoints: 0,
chunkOverlapRatio: 0,
priceTip: '',
uploadRate: 50,
splitSources2Chunks: () => {}
importSource: ImportDataSourceEnum.fileLocal
});
export const useImportStore = () => useContext(StateContext);
const Provider = ({
importSource,
dataset,
parentId,
children
}: {
importSource: `${ImportDataSourceEnum}`;
dataset: DatasetItemType;
parentId?: string;
children: React.ReactNode;
@ -86,7 +82,6 @@ const Provider = ({
const { t } = useTranslation();
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
const [showRePreview, setShowRePreview] = useState(false);
// watch form
const mode = processParamsForm.watch('mode');
@ -154,68 +149,15 @@ const Provider = ({
const chunkSize = wayStaticPrams[way].chunkSize;
useEffect(() => {
setShowRePreview(true);
}, [mode, way, chunkSize, customSplitChar]);
const totalChunkChars = useMemo(
() => sources.reduce((sum, file) => sum + file.chunkChars, 0),
[sources]
);
const predictPoints = useMemo(() => {
const totalTokensPredict = totalChunkChars / 1000;
if (mode === TrainingModeEnum.auto) {
const price = totalTokensPredict * 1.3 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
if (mode === TrainingModeEnum.qa) {
const price = totalTokensPredict * 1.2 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
return +(totalTokensPredict * vectorModel.charsPointsPrice).toFixed(2);
}, [agentModel.charsPointsPrice, mode, totalChunkChars, vectorModel.charsPointsPrice]);
const totalChunks = useMemo(
() => sources.reduce((sum, file) => sum + file.chunks.length, 0),
[sources]
);
const splitSources2Chunks = useCallback(() => {
setSources((state) =>
state.map((file) => {
const { chunks, chars } = splitText2Chunks({
text: file.rawText,
chunkLen: chunkSize,
overlapRatio: selectModelStaticParam.chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return {
...file,
chunkChars: chars,
chunks: chunks.map((chunk, i) => ({
chunkIndex: i,
q: chunk,
a: ''
}))
};
})
);
setShowRePreview(false);
}, [chunkSize, customSplitChar, selectModelStaticParam.chunkOverlapRatio]);
const value = {
const value: useImportStoreType = {
parentId,
processParamsForm,
...selectModelStaticParam,
sources,
setSources,
showRePreview,
totalChunkChars,
totalChunks,
chunkSize,
predictPoints,
splitSources2Chunks
importSource
};
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
};

View File

@ -1,4 +1,4 @@
import React, { useEffect, useMemo, useRef, useState } from 'react';
import React, { useMemo, useRef, useState } from 'react';
import {
Box,
Flex,
@ -21,11 +21,11 @@ import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider';
import Tag from '@/components/Tag';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import Preview from '../components/Preview';
import Tag from '@/components/Tag';
function DataProcess({
showPreviewChunks = true,
@ -38,17 +38,11 @@ function DataProcess({
const { feConfigs } = useSystemStore();
const {
processParamsForm,
sources,
chunkSizeField,
minChunkSize,
showChunkInput,
showPromptInput,
maxChunkSize,
totalChunkChars,
totalChunks,
predictPoints,
showRePreview,
splitSources2Chunks,
priceTip
} = useImportStore();
const { getValues, setValue, register } = processParamsForm;
@ -69,16 +63,10 @@ function DataProcess({
});
}, [feConfigs?.isPlus]);
useEffect(() => {
if (showPreviewChunks) {
splitSources2Chunks();
}
}, []);
return (
<Box h={'100%'} display={['block', 'flex']} gap={5}>
<Box flex={'1 0 0'} maxW={'600px'}>
<Flex fontWeight={'bold'} alignItems={'center'}>
<Box flex={'1 0 0'} minW={['auto', '540px']} maxW={'600px'}>
<Flex alignItems={'center'}>
<MyIcon name={'common/settingLight'} w={'20px'} />
<Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box>
</Flex>
@ -273,34 +261,18 @@ function DataProcess({
}}
></LeftRadio>
</Flex>
{showPreviewChunks && (
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.dataset.Total chunks', { total: totalChunks })}
</Tag>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.Total chars', { total: totalChunkChars })}
</Tag>
{feConfigs?.show_pay && (
<MyTooltip label={priceTip}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.dataset.import.Estimated points', { points: predictPoints })}
</Tag>
</MyTooltip>
)}
</Flex>
)}
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
{showPreviewChunks && showRePreview && (
<Button variant={'primaryOutline'} onClick={splitSources2Chunks}>
{t('core.dataset.import.Re Preview')}
</Button>
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
{feConfigs?.show_pay && (
<MyTooltip label={priceTip}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{priceTip}
</Tag>
</MyTooltip>
)}
</Flex>
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
<Button
onClick={() => {
if (showRePreview) {
splitSources2Chunks();
}
goToNext();
}}
>
@ -308,7 +280,9 @@ function DataProcess({
</Button>
</Flex>
</Box>
<Preview sources={sources} showPreviewChunks={showPreviewChunks} />
<Box flex={'1 0 0'} w={'0'}>
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
{isOpenCustomPrompt && (
<PromptTextarea

View File

@ -1,5 +1,4 @@
import React from 'react';
import { useImportStore } from '../Provider';
import Preview from '../components/Preview';
import { Box, Button, Flex } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
@ -12,12 +11,11 @@ const PreviewData = ({
goToNext: () => void;
}) => {
const { t } = useTranslation();
const { sources, setSources } = useImportStore();
return (
<Flex flexDirection={'column'} h={'100%'}>
<Box flex={'1 0 0 '}>
<Preview showPreviewChunks={showPreviewChunks} sources={sources} />
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
<Flex mt={2} justifyContent={'flex-end'}>
<Button onClick={goToNext}>{t('common.Next Step')}</Button>

View File

@ -1,4 +1,4 @@
import React, { useEffect, useState } from 'react';
import React from 'react';
import {
Box,
TableContainer,
@ -8,164 +8,109 @@ import {
Th,
Td,
Tbody,
Progress,
Flex,
Button
} from '@chakra-ui/react';
import { useImportStore, type FormType } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { chunksUpload, fileCollectionCreate } from '@/web/core/dataset/utils';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import { postCreateDatasetLinkCollection, postDatasetCollection } from '@/web/core/dataset/api';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { checkTeamDatasetSizeLimit } from '@/web/support/user/team/api';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
} from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import Tag from '@/components/Tag';
const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const Upload = () => {
const { t } = useTranslation();
const { toast } = useToast();
const router = useRouter();
const { datasetDetail } = useDatasetStore();
const { parentId, sources, processParamsForm, chunkSize, totalChunks, uploadRate } =
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
useImportStore();
const [uploadList, setUploadList] = useState<
(ImportSourceItemType & {
uploadedFileRate: number;
uploadedChunksRate: number;
})[]
>([]);
const { handleSubmit } = processParamsForm;
const { mutate: startUpload, isLoading } = useRequest({
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
if (uploadList.length === 0) return;
await checkTeamDatasetSizeLimit(totalChunks);
let totalInsertion = 0;
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
// Batch create collection and upload chunks
for await (const item of uploadList) {
// create collection
const collectionId = await (async () => {
const commonParams = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
name: item.sourceName,
rawTextLength: item.rawText.length,
hashRawText: hashStr(item.rawText)
};
if (item.file) {
return fileCollectionCreate({
file: item.file,
data: {
...commonParams,
collectionMetadata: {
relatedImgId: item.id
for await (const item of filterWaitingSources) {
setSources((state) =>
state.map((source) =>
source.id === item.id
? {
...source,
createStatus: 'creating'
}
},
percentListen: (e) => {
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: e
}
: uploadItem
)
);
}
});
} else if (item.link) {
const { collectionId } = await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: 100
}
: uploadItem
)
);
return collectionId;
} else if (item.rawText) {
// manual collection
return postDatasetCollection({
...commonParams,
type: DatasetCollectionTypeEnum.virtual
});
}
return '';
})();
: source
)
);
if (!collectionId) continue;
if (item.link) continue;
// create collection
const commonParams = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
const billId = await postCreateTrainingUsage({
name: item.sourceName,
datasetId: datasetDetail._id
});
name: item.sourceName
};
if (importSource === ImportDataSourceEnum.fileLocal && item.dbFileId) {
await postCreateDatasetFileCollection({
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.fileLink && item.link) {
await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
} else if (importSource === ImportDataSourceEnum.fileCustom && item.rawText) {
// manual collection
await postCreateDatasetTextCollection({
...commonParams,
text: item.rawText
});
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
await postCreateDatasetCsvTableCollection({
...commonParams,
fileId: item.dbFileId
});
}
// upload chunks
const chunks = item.chunks;
const { insertLen } = await chunksUpload({
collectionId,
billId,
trainingMode: mode,
chunks,
rate: uploadRate,
onUploading: (e) => {
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedChunksRate: e
}
: uploadItem
)
);
},
prompt: qaPrompt
});
totalInsertion += insertLen;
setSources((state) =>
state.map((source) =>
source.id === item.id
? {
...source,
createStatus: 'finish'
}
: source
)
);
}
return totalInsertion;
},
onSuccess(num) {
if (showPreviewChunks) {
toast({
title: t('core.dataset.import.Import Success Tip', { num }),
status: 'success'
});
} else {
toast({
title: t('core.dataset.import.Upload success'),
status: 'success'
});
}
onSuccess() {
toast({
title: t('core.dataset.import.Import success'),
status: 'success'
});
// close import page
router.replace({
@ -175,21 +120,21 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
}
});
},
onError() {
setSources((state) =>
state.map((source) =>
source.createStatus === 'creating'
? {
...source,
createStatus: 'waiting'
}
: source
)
);
},
errorToast: t('common.file.Upload failed')
});
useEffect(() => {
setUploadList(
sources.map((item) => {
return {
...item,
uploadedFileRate: item.file ? 0 : -1,
uploadedChunksRate: 0
};
})
);
}, []);
return (
<Box>
<TableContainer>
@ -199,85 +144,35 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Source name')}
</Th>
{showPreviewChunks ? (
<>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.Chunk amount')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderRightRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Data file progress')}
</Th>
</>
) : (
<>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload status')}
</Th>
</>
)}
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload status')}
</Th>
</Tr>
</Thead>
<Tbody>
{uploadList.map((item) => (
{sources.map((item) => (
<Tr key={item.id}>
<Td display={'flex'} alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
{item.sourceName}
<Td>
<Flex alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
<Box whiteSpace={'wrap'} maxW={'30vw'}>
{item.sourceName}
</Box>
</Flex>
</Td>
<Td>
<Box display={'inline-block'}>
{item.createStatus === 'waiting' && (
<Tag colorSchema={'gray'}>{t('common.Waiting')}</Tag>
)}
{item.createStatus === 'creating' && (
<Tag colorSchema={'blue'}>{t('common.Creating')}</Tag>
)}
{item.createStatus === 'finish' && (
<Tag colorSchema={'green'}>{t('common.Finish')}</Tag>
)}
</Box>
</Td>
{showPreviewChunks ? (
<>
<Td>{item.chunks.length}</Td>
<Td>
{item.uploadedFileRate === -1 ? (
'-'
) : (
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
)}
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedChunksRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'purple'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedChunksRate}%`}
</Flex>
</Td>
</>
) : (
<>
<Td color={item.uploadedFileRate === 100 ? 'green.600' : 'myGray.600'}>
{item.uploadedFileRate === 100 ? t('common.Finish') : t('common.Waiting')}
</Td>
</>
)}
</Tr>
))}
</Tbody>
@ -286,8 +181,8 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Flex justifyContent={'flex-end'} mt={4}>
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
{uploadList.length > 0
? `${t('core.dataset.import.Total files', { total: uploadList.length })} | `
{sources.length > 0
? `${t('core.dataset.import.Total files', { total: sources.length })} | `
: ''}
{t('core.dataset.import.Start upload')}
</Button>

View File

@ -0,0 +1,296 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useMemo, useState } from 'react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { uploadFile2DB } from '@/web/common/file/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type SelectFileItemType = {
fileId: string;
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
onStartSelect,
onFinishSelect,
...props
}: {
fileType: string;
selectFiles: ImportSourceItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
onStartSelect: () => void;
onFinishSelect: () => void;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
const { File, onOpen } = useSelectFile({
fileType,
multiple: true,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const isMaxSelected = useMemo(
() => selectFiles.length >= maxCount,
[maxCount, selectFiles.length]
);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
onStartSelect();
setSelectFiles((state) => {
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
const { fileId, file } = selectFile;
return {
id: fileId,
createStatus: 'waiting',
file,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
isUploading: true,
uploadedFileRate: 0
};
});
const results = formatFiles.concat(state).slice(0, maxCount);
return results;
});
try {
// upload file
await Promise.all(
files.map(async ({ fileId, file }) => {
const uploadFileId = await uploadFile2DB({
file,
bucketName: BucketNameEnum.dataset,
percentListen: (e) => {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
uploadedFileRate: e
}
: item
)
);
}
});
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
dbFileId: uploadFileId,
isUploading: false
}
: item
)
);
})
);
} catch (error) {
console.log(error);
}
onFinishSelect();
}
}
});
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
if (selectFiles.length + files.length > maxCount) {
files = files.slice(0, maxCount - selectFiles.length);
toast({
status: 'warning',
title: t('common.file.Some file count exceeds limit', { maxCount })
});
}
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxCount, maxSize, onSelectFile, selectFiles.length, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
fileId: getNanoid(),
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
{...(isMaxSelected
? {}
: {
cursor: 'pointer',
_hover: {
bg: 'primary.50',
borderColor: 'primary.600'
},
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
onDragEnter: handleDragEnter,
onDragOver: (e) => e.preventDefault(),
onDragLeave: handleDragLeave,
onDrop: handleDrop,
onClick: onOpen
})}
{...props}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
{isMaxSelected ? (
<>
<Box color={'myGray.500'} fontSize={'xs'}>
</Box>
</>
) : (
<>
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
)
}
/>
</>
)}
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@ -3,9 +3,9 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
import { ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import { ImportDataSourceEnum } from '..';
import { useRouter } from 'next/router';
import { TabEnum } from '../../..';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
const { t } = useTranslation();

View File

@ -1,132 +1,94 @@
import React, { useMemo, useState } from 'react';
import { Box, Flex } from '@chakra-ui/react';
import React, { useState } from 'react';
import { Box, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import RowTabs from '@fastgpt/web/components/common/Tabs/RowTabs';
import { useImportStore } from '../Provider';
import MyMenu from '@/components/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
enum PreviewListEnum {
chunks = 'chunks',
sources = 'sources'
}
const Preview = ({
sources,
showPreviewChunks
}: {
sources: ImportSourceItemType[];
showPreviewChunks: boolean;
}) => {
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const [previewListType, setPreviewListType] = useState(
showPreviewChunks ? PreviewListEnum.chunks : PreviewListEnum.sources
);
const chunks = useMemo(() => {
const oneSourceChunkLength = Math.max(4, Math.floor(50 / sources.length));
return sources
.map((source) =>
source.chunks.slice(0, oneSourceChunkLength).map((chunk, i) => ({
...chunk,
index: i + 1,
sourceName: source.sourceName,
sourceIcon: source.icon
}))
)
.flat();
}, [sources]);
const { sources } = useImportStore();
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
return (
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'} flex={'1 0 0'}>
<Box>
<RowTabs
list={[
...(showPreviewChunks
? [
{
icon: 'common/viewLight',
label: t('core.dataset.import.Preview chunks'),
value: PreviewListEnum.chunks
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'}>
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
<Box fontSize={'lg'}>{t('core.dataset.import.Sources list')}</Box>
</Flex>
<Box mt={3} flex={'1 0 0'} width={'100%'} overflow={'auto'}>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
alignItems={'center'}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} w={0} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box fontSize={'xs'} color={'myGray.600'}>
<MyMenu
Button={
<IconButton
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
aria-label={''}
size={'sm'}
variant={'whitePrimary'}
/>
}
]
: []),
{
icon: 'core/dataset/fileCollection',
label: t('core.dataset.import.Sources list'),
value: PreviewListEnum.sources
}
]}
value={previewListType}
onChange={(e) => setPreviewListType(e as PreviewListEnum)}
/>
</Box>
<Box mt={3} flex={'1 0 0'} overflow={'auto'}>
{previewListType === PreviewListEnum.chunks ? (
<>
{chunks.map((chunk, i) => (
<Box
key={i}
p={4}
bg={'white'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
whiteSpace={'pre-wrap'}
>
<Flex mb={1} alignItems={'center'} fontSize={'sm'}>
<Box
flexShrink={0}
px={1}
color={'primary.600'}
borderWidth={'1px'}
borderColor={'primary.200'}
bg={'primary.50'}
borderRadius={'sm'}
>
# {chunk.index}
</Box>
<Flex ml={2} fontWeight={'bold'} alignItems={'center'} gap={1}>
<MyIcon name={chunk.sourceIcon as any} w={'14px'} />
{chunk.sourceName}
</Flex>
</Flex>
<Box fontSize={'xs'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
<Box color={'myGray.900'}>{chunk.q}</Box>
<Box color={'myGray.500'}>{chunk.a}</Box>
</Box>
menuList={[
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
{t('core.dataset.import.Preview raw text')}
</Flex>
),
onClick: () => setPreviewRawTextSource(source)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
{t('core.dataset.import.Preview chunks')}
</Flex>
),
onClick: () => setPreviewChunkSource(source)
}
]}
/>
</Box>
))}
</>
) : (
<>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box>
{t('core.dataset.import.File chunk amount', { amount: source.chunks.length })}
</Box>
)}
</Flex>
))}
</>
)}
)}
</Flex>
))}
</Box>
{!!previewRawTextSource && (
<PreviewRawText
previewSource={previewRawTextSource}
onClose={() => setPreviewRawTextSource(undefined)}
/>
)}
{!!previewChunkSource && (
<PreviewChunks
previewSource={previewChunkSource}
onClose={() => setPreviewChunkSource(undefined)}
/>
)}
</Box>
);
};

View File

@ -0,0 +1,95 @@
import React, { useMemo } from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewChunks = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
const { data = [], isLoading } = useQuery(
['previewSource'],
() => {
if (
importSource === ImportDataSourceEnum.fileLocal ||
importSource === ImportDataSourceEnum.csvTable ||
importSource === ImportDataSourceEnum.fileLink
) {
return getPreviewChunks({
type: importSource,
sourceId: previewSource.dbFileId || previewSource.link || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar')
});
} else if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const { chunks } = splitText2Chunks({
text: previewSource.rawText || '',
chunkLen: chunkSize,
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return chunks.map((chunk) => ({
q: chunk,
a: ''
}));
}
return [];
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
maxW={['90vw', '40vw']}
>
{data.map((item, index) => (
<Box
key={index}
whiteSpace={'pre-wrap'}
fontSize={'sm'}
p={4}
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
_notLast={{
mb: 2
}}
>
<Box color={'myGray.900'}>{item.q}</Box>
<Box color={'myGray.500'}>{item.a}</Box>
</Box>
))}
</MyRightDrawer>
);
};
export default React.memo(PreviewChunks);

View File

@ -1,28 +1,73 @@
import React from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { ModalBody } from '@chakra-ui/react';
export type PreviewRawTextProps = {
icon: string;
title: string;
rawText: string;
};
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import { getPreviewFileContent } from '@/web/common/file/api';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewRawText = ({
icon,
title,
rawText,
previewSource,
onClose
}: PreviewRawTextProps & {
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource } = useImportStore();
const { data, isLoading } = useQuery(
['previewSource', previewSource?.dbFileId],
() => {
if (importSource === ImportDataSourceEnum.fileLocal && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: true
});
}
if (importSource === ImportDataSourceEnum.csvTable && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: false
});
}
if (importSource === ImportDataSourceEnum.fileCustom) {
return {
previewContent: (previewSource.rawText || '').slice(0, 3000)
};
}
return {
previewContent: ''
};
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
const rawText = data?.previewContent || '';
return (
<MyModal isOpen onClose={onClose} iconSrc={icon} title={title}>
<ModalBody whiteSpace={'pre-wrap'} overflowY={'auto'}>
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
>
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} fontSize={'sm'}>
{rawText}
</ModalBody>
</MyModal>
</Box>
</MyRightDrawer>
);
};
export default PreviewRawText;
export default React.memo(PreviewRawText);

View File

@ -0,0 +1,119 @@
import React, { useState } from 'react';
import {
Flex,
TableContainer,
Table,
Thead,
Tr,
Th,
Td,
Tbody,
Progress,
IconButton
} from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@/components/MyTooltip';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
export const RenderUploadFiles = ({
files,
setFiles,
showPreviewContent
}: {
files: ImportSourceItemType[];
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showPreviewContent?: boolean;
}) => {
const { t } = useTranslation();
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
return files.length > 0 ? (
<>
<TableContainer mt={5}>
<Table variant={'simple'} fontSize={'sm'} draggable={false}>
<Thead draggable={false}>
<Tr bg={'myGray.100'} mb={2}>
<Th borderLeftRadius={'md'} borderBottom={'none'} py={4}>
{t('common.file.File Name')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('common.file.File Size')}
</Th>
<Th borderRightRadius={'md'} borderBottom={'none'} py={4}>
{t('common.Action')}
</Th>
</Tr>
</Thead>
<Tbody>
{files.map((item) => (
<Tr key={item.id}>
<Td>
<Flex alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
{item.sourceName}
</Flex>
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={(item.uploadedFileRate || 0) >= 100 ? 'green' : 'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
</Td>
<Td>{item.sourceSize}</Td>
<Td>
{!item.isUploading && (
<Flex alignItems={'center'} gap={4}>
{showPreviewContent && (
<MyTooltip label={t('core.dataset.import.Preview raw text')}>
<IconButton
variant={'whitePrimary'}
size={'sm'}
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
aria-label={''}
onClick={() => setPreviewFile(item)}
/>
</MyTooltip>
)}
<IconButton
variant={'grayDanger'}
size={'sm'}
icon={<MyIcon name={'delete'} w={'14px'} />}
aria-label={''}
onClick={() => {
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
)}
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
{!!previewFile && (
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
)}
</>
) : null;
};

View File

@ -1,4 +1,4 @@
import React, { useEffect } from 'react';
import React, { useCallback, useEffect } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import dynamic from 'next/dynamic';
@ -19,7 +19,7 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<>
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
@ -36,6 +36,24 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
}
});
const onSubmit = useCallback(
(data: { name: string; value: string }) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
createStatus: 'waiting',
rawText: data.value,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
},
[goToNext, setSources]
);
useEffect(() => {
const source = sources[0];
if (source) {
@ -78,25 +96,7 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
/>
</Box>
<Flex mt={5} justifyContent={'flex-end'}>
<Button
onClick={handleSubmit((data) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
rawText: data.value,
chunks: [],
chunkChars: 0,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
})}
>
{t('common.Next Step')}
</Button>
<Button onClick={handleSubmit((data) => onSubmit(data))}>{t('common.Next Step')}</Button>
</Flex>
</Box>
);

View File

@ -23,7 +23,7 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<>
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks={false} />}
{activeStep === 2 && <Upload />}
</>
);
};
@ -128,10 +128,8 @@ const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
setSources(
newLinkList.map((link) => ({
id: getNanoid(32),
createStatus: 'waiting',
link,
rawText: '',
chunks: [],
chunkChars: 0,
sourceName: link,
icon: LinkCollectionIcon
}))

View File

@ -1,41 +1,27 @@
import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import React, { useCallback, useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { readFileRawContent } from '@fastgpt/web/common/file/read';
import { getUploadBase64ImgController } from '@/web/common/file/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import MyTooltip from '@/components/MyTooltip';
import type { PreviewRawTextProps } from '../components/PreviewRawText';
import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { RenderUploadFiles } from '../components/RenderFiles';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const Upload = dynamic(() => import('../commonProgress/Upload'));
const PreviewRawText = dynamic(() => import('../components/PreviewRawText'));
type FileItemType = ImportSourceItemType & { file: File };
const fileType = '.txt, .docx, .csv, .pdf, .md, .html';
const maxSelectFileCount = 1000;
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
@ -44,135 +30,47 @@ export default React.memo(FileLocal);
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore();
// @ts-ignore
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
const [previewRaw, setPreviewRaw] = useState<PreviewRawTextProps>();
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
}, [setSources, successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
for await (const selectFile of files) {
const { file, folderPath } = selectFile;
const relatedId = getNanoid(32);
const { rawText } = await (() => {
try {
return readFileRawContent({
file,
uploadBase64Controller: (base64Img) =>
getUploadBase64ImgController({
base64Img,
type: MongoImageTypeEnum.collectionImage,
metadata: {
relatedId
}
})
});
} catch (error) {
return { rawText: '' };
}
})();
const item: FileItemType = {
id: relatedId,
file,
rawText,
chunks: [],
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg: rawText.length === 0 ? t('common.file.Empty file tip') : ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, maxSelectFileCount);
return results;
});
}
}
}
});
const onclickNext = useCallback(() => {
// filter uploaded files
setSelectFiles((state) => state.filter((item) => (item.uploadedFileRate || 0) >= 100));
goToNext();
}, [goToNext]);
return (
<Box>
<FileSelector
isLoading={isLoading}
fileType={fileType}
multiple
maxCount={maxSelectFileCount}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
onSelectFile={onSelectFile}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
{/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
{selectFiles.map((item) => (
<MyTooltip key={item.id} label={t('core.dataset.import.Preview raw text')}>
<Flex
alignItems={'center'}
px={4}
py={3}
borderRadius={'md'}
bg={'myGray.100'}
cursor={'pointer'}
onClick={() =>
setPreviewRaw({
icon: item.icon,
title: item.sourceName,
rawText: item.rawText.slice(0, 10000)
})
}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
{item.rawText.length > 0 && (
<>,{t('common.Number of words', { amount: item.rawText.length })}</>
)}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={(e) => {
e.stopPropagation();
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
</MyTooltip>
))}
</Flex>
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
<Box textAlign={'right'}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
<Box textAlign={'right'} mt={5}>
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''}
{t('common.Next Step')}
</Button>
</Box>
{previewRaw && <PreviewRawText {...previewRaw} onClose={() => setPreviewRaw(undefined)} />}
</Box>
);
});

View File

@ -1,108 +1,62 @@
import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
import { RenderUploadFiles } from '../components/RenderFiles';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
type FileItemType = ImportSourceItemType & { file: File };
const fileType = '.csv';
const maxSelectFileCount = 1000;
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
export default React.memo(FileLocal);
const csvTemplate = `index,content
"必填内容","可选内容。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
const csvTemplate = `"第一列内容","第二列内容"
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会讲第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore();
// @ts-ignore
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
for await (const selectFile of files) {
const { file, folderPath } = selectFile;
const { header, data } = await readCsvContent({ file });
const filterData: FileItemType['chunks'] = data
.filter((item) => item[0])
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
chunkIndex: 0
}));
const item: FileItemType = {
id: getNanoid(32),
file,
rawText: '',
chunks: filterData,
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg:
header[0] !== 'index' || header[1] !== 'content' || filterData.length === 0
? t('core.dataset.import.Csv format error')
: ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, 10);
return results;
});
}
}
},
errorToast: t('common.file.Select failed')
});
return (
<Box>
<FileSelector
multiple
maxCount={maxSelectFileCount}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
isLoading={isLoading}
fileType={fileType}
onSelectFile={onSelectFile}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
<Box
@ -122,43 +76,16 @@ const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () =
</Box>
{/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
{selectFiles.map((item) => (
<Flex
key={item.id}
alignItems={'center'}
px={4}
py={2}
borderRadius={'md'}
bg={'myGray.100'}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={() => {
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Flex>
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
<Box textAlign={'right'}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
<Box textAlign={'right'} mt={5}>
<Button
isDisabled={successFiles.length === 0 || uploading}
onClick={() => {
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
goToNext();
}}
>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''}

View File

@ -6,22 +6,15 @@ import { useRouter } from 'next/router';
import { TabEnum } from '../../index';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import dynamic from 'next/dynamic';
import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import Provider from './Provider';
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
}
const ImportDataset = () => {
const { t } = useTranslation();
const router = useRouter();
@ -65,7 +58,7 @@ const ImportDataset = () => {
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.tableLocal]: [
[ImportDataSourceEnum.csvTable]: [
{
title: t('core.dataset.import.Select file')
},
@ -88,7 +81,7 @@ const ImportDataset = () => {
if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
if (source === ImportDataSourceEnum.fileLink) return FileLink;
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (source === ImportDataSourceEnum.tableLocal) return TableLocal;
if (source === ImportDataSourceEnum.csvTable) return TableLocal;
}, [source]);
return ImportComponent ? (
@ -142,7 +135,7 @@ const ImportDataset = () => {
<MyStep />
</Box>
</Box>
<Provider dataset={datasetDetail} parentId={parentId}>
<Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
<ImportComponent activeStep={activeStep} goToNext={goToNext} />
</Box>

View File

@ -0,0 +1,7 @@
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type UploadFileItemType = ImportSourceItemType & {
file?: File;
isUploading: boolean;
uploadedFileRate: number;
};

View File

@ -1,19 +1,5 @@
import React, { useEffect, useMemo, useState } from 'react';
import {
Box,
Textarea,
Button,
Flex,
useTheme,
useDisclosure,
Table,
Thead,
Tbody,
Tr,
Th,
Td,
TableContainer
} from '@chakra-ui/react';
import { Box, Textarea, Button, Flex, useTheme, useDisclosure } from '@chakra-ui/react';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
import { postSearchText } from '@/web/core/dataset/api';
@ -36,10 +22,7 @@ import { useForm } from 'react-hook-form';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
import { delay } from '@fastgpt/global/common/system/utils';
import QuoteItem from '@/components/core/dataset/QuoteItem';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import SearchParamsTip from '@/components/core/dataset/SearchParamsTip';
@ -134,34 +117,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
});
}
});
// const { mutate: onFileTest, isLoading: fileTestIsLoading } = useRequest({
// mutationFn: async ({ searchParams }: FormType) => {
// if (!selectFile) return Promise.reject('File is not selected');
// const { data } = await readCsvContent({ file: selectFile });
// const testList = data.slice(0, 100);
// const results: SearchTestResponse[] = [];
// for await (const item of testList) {
// try {
// const result = await postSearchText({ datasetId, text: item[0].trim(), ...searchParams });
// results.push(result);
// } catch (error) {
// await delay(500);
// }
// }
// return results;
// },
// onSuccess(res: SearchTestResponse[]) {
// console.log(res);
// },
// onError(err) {
// toast({
// title: getErrText(err),
// status: 'error'
// });
// }
// });
const onSelectFile = async (files: File[]) => {
const file = files[0];

View File

@ -101,7 +101,9 @@ const Standard = ({
{t('support.wallet.subscription.Sub plan')}
</Box>
<Box mt={8} mb={10} color={'myGray.500'} fontSize={'lg'}>
{t('support.wallet.subscription.Sub plan tip')}
{t('support.wallet.subscription.Sub plan tip', {
title: feConfigs?.systemTitle
})}
</Box>
<Box>
<RowTabs

View File

@ -13,7 +13,7 @@ import { checkTeamAiPointsAndLock } from './utils';
import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
import { addMinutes } from 'date-fns';
import { countGptMessagesTokens } from '@fastgpt/global/common/string/tiktoken';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@ -128,7 +128,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
});
// get vector and insert
const { insertLen } = await pushDataListToTrainingQueue({
const { insertLen } = await pushDataListToTrainingQueueByCollectionId({
teamId: data.teamId,
tmbId: data.tmbId,
collectionId: data.collectionId,

View File

@ -1,4 +1,4 @@
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
import { GET, POST } from '@/web/common/api/request';
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
import { AxiosProgressEvent } from 'axios';
@ -8,10 +8,16 @@ export const postUploadFiles = (
data: FormData,
onUploadProgress: (progressEvent: AxiosProgressEvent) => void
) =>
POST<string[]>('/common/file/upload', data, {
POST<string>('/common/file/upload', data, {
timeout: 480000,
onUploadProgress,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
POST<{
previewContent: string;
totalLength: number;
}>('/common/file/previewContent', data);

View File

@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
/**
* upload file to mongo gridfs
*/
export const uploadFiles = ({
files,
export const uploadFile2DB = ({
file,
bucketName,
metadata = {},
percentListen
}: {
files: File[];
file: File;
bucketName: `${BucketNameEnum}`;
metadata?: Record<string, any>;
percentListen?: (percent: number) => void;
@ -21,9 +21,7 @@ export const uploadFiles = ({
const form = new FormData();
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', bucketName);
files.forEach((file) => {
form.append('file', file, encodeURIComponent(file.name));
});
form.append('file', file, encodeURIComponent(file.name));
return postUploadFiles(form, (e) => {
if (!e.total) return;

View File

@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
accept={fileType}
multiple={multiple}
onChange={(e) => {
if (!e.target.files || e.target.files?.length === 0) return;
if (e.target.files.length > maxCount) {
return toast({
const files = e.target.files;
if (!files || files?.length === 0) return;
let fileList = Array.from(files);
if (fileList.length > maxCount) {
toast({
status: 'warning',
title: t('common.file.Select file amount limit', { max: maxCount })
});
fileList = fileList.slice(0, maxCount);
}
onSelect(Array.from(e.target.files), openSign.current);
onSelect(fileList, openSign.current);
}}
/>
</Box>

View File

@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
let options = {};
if (MediaRecorder.isTypeSupported('audio/webm')) {
options = { type: 'audio/webm' };
} else if (MediaRecorder.isTypeSupported('video/mp4')) {
options = { type: 'video/mp4' };
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
options = { type: 'video/mp3' };
} else {
console.error('no suitable mimetype found for this device');
}
const blob = new Blob(chunks, options);
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
formData.append('file', blob, 'recording.mp4');
formData.append('file', blob, 'recording.mp3');
formData.append(
'data',
JSON.stringify({

View File

@ -8,13 +8,19 @@ import type {
} from '@/global/core/api/datasetReq.d';
import type {
CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams
PostWebsiteSyncParams,
TextCreateDatasetCollectionParams
} from '@fastgpt/global/core/dataset/api.d';
import type {
GetTrainingQueueProps,
GetTrainingQueueResponse,
PostPreviewFilesChunksProps,
PostPreviewFilesChunksResponse,
PostPreviewTableChunksResponse,
SearchTestProps,
SearchTestResponse
} from '@/global/core/dataset/api.d';
@ -23,10 +29,7 @@ import type {
CreateDatasetParams,
InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionSyncResultEnum,
@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
export const getDatasetDataItemById = (id: string) =>
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
/**
* push data to training queue
*/
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
/**
* insert one data to dataset (immediately insert)
*/
@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
/* get length of system training queue */
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */
export const getFileViewUrl = (fileId: string) =>

View File

@ -1,200 +0,0 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useState } from 'react';
export type SelectFileItemType = {
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
multiple,
maxCount,
maxSize,
isLoading,
onSelectFile,
...props
}: {
fileType: string;
multiple?: boolean;
maxCount?: number;
maxSize?: number;
isLoading?: boolean;
onSelectFile: (e: SelectFileItemType[]) => any;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { File, onOpen } = useSelectFile({
fileType,
multiple,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxSize, onSelectFile, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
cursor={'pointer'}
_hover={{
bg: 'primary.50',
borderColor: 'primary.600'
}}
{...(isDragging
? {
borderColor: 'primary.600'
}
: {
borderColor: 'borderColor.high'
})}
{...props}
onDragEnter={handleDragEnter}
onDragOver={(e) => e.preventDefault()}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={onOpen}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
folderPath: '',
file
}))
)
}
/>
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@ -1,6 +1,6 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from './constants';
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
import { UseFormReturn } from 'react-hook-form';
export type ImportDataComponentProps = {
@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
export type ImportSourceItemType = {
id: string;
rawText: string;
chunks: PushDatasetDataChunkProps[];
chunkChars: number;
sourceFolderPath?: string;
sourceName: string;
sourceSize?: string;
icon: string;
createStatus: 'waiting' | 'creating' | 'finish';
metadata?: Record<string, any>;
errorMsg?: string;
// source
sourceName: string;
sourceSize?: string;
icon: string;
// file
isUploading?: boolean;
uploadedFileRate?: number;
dbFileId?: string; // 存储在数据库里的文件Id这个 ID 还是图片和集合的 metadata 中 relateId
file?: File;
// link
link?: string;
// custom text
rawText?: string;
};
export type ImportSourceParamsType = UseFormReturn<

View File

@ -1,95 +1,5 @@
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { delay } from '@fastgpt/global/common/system/utils';
import { getFileViewUrl } from '@/web/core/dataset/api';
import { strIsLink } from '@fastgpt/global/common/string/tools';
import type {
FileCreateDatasetCollectionParams,
PushDatasetDataChunkProps
} from '@fastgpt/global/core/dataset/api.d';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { POST } from '@/web/common/api/request';
/* upload a file to create collection */
export const fileCollectionCreate = ({
file,
metadata = {},
data,
percentListen
}: {
file: File;
metadata?: Record<string, any>;
data: FileCreateDatasetCollectionParams;
percentListen: (percent: number) => void;
}) => {
const form = new FormData();
form.append('data', JSON.stringify(data));
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', BucketNameEnum.dataset);
form.append('file', file, encodeURIComponent(file.name));
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
timeout: 480000,
onUploadProgress: (e) => {
if (!e.total) return;
const percent = Math.round((e.loaded / e.total) * 100);
percentListen && percentListen(percent);
},
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
};
export async function chunksUpload({
billId,
collectionId,
trainingMode,
chunks,
prompt,
rate = 50,
onUploading
}: {
billId: string;
collectionId: string;
trainingMode: `${TrainingModeEnum}`;
chunks: PushDatasetDataChunkProps[];
prompt?: string;
rate?: number;
onUploading?: (rate: number) => void;
}) {
async function upload(data: PushDatasetDataChunkProps[]) {
return postChunks2Dataset({
collectionId,
trainingMode,
data,
prompt,
billId
});
}
let successInsert = 0;
let retryTimes = 10;
for (let i = 0; i < chunks.length; i += rate) {
try {
const uploadChunks = chunks.slice(i, i + rate);
const { insertLen } = await upload(uploadChunks);
if (onUploading) {
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
}
successInsert += insertLen;
} catch (error) {
if (retryTimes === 0) {
return Promise.reject(error);
}
await delay(1000);
retryTimes--;
i -= rate;
}
}
return { insertLen: successInsert };
}
export async function getFileAndOpen(fileId: string) {
if (strIsLink(fileId)) {

View File

@ -3,9 +3,9 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
# please download the model from https://huggingface.co/BAAI/bge-reranker-base and put it in the same directory as Dockerfile
COPY ./bge-reranker-base ./bge-reranker-base
COPY requirement.txt .
COPY requirements.txt .
RUN python3 -m pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN python3 -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
COPY app.py Dockerfile .