mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-25 20:02:47 +00:00
Feat: pptx and xlsx loader (#1118)
Some checks failed
deploy-docs / deploy-production (push) Has been cancelled
Build docs images and copy image to docker hub / build-fastgpt-docs-images (push) Has been cancelled
Build FastGPT images in Personal warehouse / build-fastgpt-images (push) Has been cancelled
Build docs images and copy image to docker hub / update-docs-image (push) Has been cancelled
Some checks failed
deploy-docs / deploy-production (push) Has been cancelled
Build docs images and copy image to docker hub / build-fastgpt-docs-images (push) Has been cancelled
Build FastGPT images in Personal warehouse / build-fastgpt-images (push) Has been cancelled
Build docs images and copy image to docker hub / update-docs-image (push) Has been cancelled
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
parent
f9d266a6af
commit
21288d1736
|
|
@ -22,7 +22,7 @@ weight: 356
|
|||
|
||||
## 工具是如何运行的
|
||||
|
||||
要了解工具如何允许,首先需要知道它的运行条件。
|
||||
要了解工具如何运行的,首先需要知道它的运行条件。
|
||||
|
||||
1. 需要工具的介绍(或者叫描述)。这个介绍会告诉LLM,这个工具的作用是什么,LLM会根据上下文语义,决定是否需要调用这个工具。
|
||||
2. 工具的参数。有些工具调用时,可能需要一些特殊的参数。参数中有2个关键的值:`参数介绍`和`是否必须`。
|
||||
|
|
|
|||
|
|
@ -3,12 +3,17 @@ import { ErrType } from '../errorCode';
|
|||
/* dataset: 507000 */
|
||||
const startCode = 507000;
|
||||
export enum CommonErrEnum {
|
||||
fileNotFound = 'fileNotFound'
|
||||
fileNotFound = 'fileNotFound',
|
||||
unAuthFile = 'unAuthFile'
|
||||
}
|
||||
const datasetErr = [
|
||||
{
|
||||
statusText: CommonErrEnum.fileNotFound,
|
||||
message: 'error.fileNotFound'
|
||||
},
|
||||
{
|
||||
statusText: CommonErrEnum.unAuthFile,
|
||||
message: 'error.unAuthFile'
|
||||
}
|
||||
];
|
||||
export default datasetErr.reduce((acc, cur, index) => {
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ export const splitText2Chunks = (props: {
|
|||
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
|
||||
|
||||
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
|
||||
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /([\n])/g, maxLen: chunkLen * 1.2 },
|
||||
|
||||
// ------ There's no overlap on the top
|
||||
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /([!]|!\s)/g, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /([?]|\?\s)/g, maxLen: chunkLen * 1.4 },
|
||||
|
|
@ -56,7 +56,7 @@ export const splitText2Chunks = (props: {
|
|||
const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
|
||||
|
||||
// if use markdown title split, Separate record title title
|
||||
// if use markdown title split, Separate record title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
if (step >= stepReges.length) {
|
||||
return [
|
||||
|
|
@ -97,6 +97,7 @@ export const splitText2Chunks = (props: {
|
|||
.filter((item) => item.text.trim());
|
||||
};
|
||||
|
||||
/* Gets the overlap at the end of a text as the beginning of the next block */
|
||||
const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
|
||||
const forbidOverlap = checkForbidOverlap(step);
|
||||
const maxOverlapLen = chunkLen * 0.4;
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ export type FastGPTFeConfigsType = {
|
|||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
|
||||
uploadFileMaxAmount?: number;
|
||||
uploadFileMaxSize?: number;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -44,14 +44,18 @@ export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams
|
|||
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
link: string;
|
||||
};
|
||||
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
fileId: string;
|
||||
};
|
||||
export type FileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
name: string;
|
||||
rawTextLength: number;
|
||||
hashRawText: string;
|
||||
|
||||
fileMetadata?: Record<string, any>;
|
||||
collectionMetadata?: Record<string, any>;
|
||||
};
|
||||
export type CsvTableCreateDatasetCollectionParams = {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
fileId: string;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type PgSearchRawType = {
|
||||
|
|
|
|||
|
|
@ -73,6 +73,13 @@ export const DatasetCollectionSyncResultMap = {
|
|||
/* ------------ data -------------- */
|
||||
|
||||
/* ------------ training -------------- */
|
||||
export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
csvTable = 'csvTable'
|
||||
}
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
chunk = 'chunk',
|
||||
auto = 'auto',
|
||||
|
|
|
|||
|
|
@ -2,18 +2,18 @@
|
|||
"name": "@fastgpt/global",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@apidevtools/swagger-parser": "^10.1.0",
|
||||
"axios": "^1.5.1",
|
||||
"dayjs": "^1.11.7",
|
||||
"encoding": "^0.1.13",
|
||||
"js-tiktoken": "^1.0.7",
|
||||
"openapi-types": "^12.1.3",
|
||||
"openai": "4.28.0",
|
||||
"nanoid": "^4.0.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"timezones-list": "^3.0.2",
|
||||
"next": "13.5.2",
|
||||
"jschardet": "3.1.1",
|
||||
"@apidevtools/swagger-parser": "^10.1.0"
|
||||
"nanoid": "^4.0.1",
|
||||
"next": "13.5.2",
|
||||
"openai": "4.28.0",
|
||||
"openapi-types": "^12.1.3",
|
||||
"timezones-list": "^3.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
import { connectionMongo, type Model } from '../../mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { RawTextBufferSchemaType } from './type';
|
||||
|
||||
export const collectionName = 'buffer.rawText';
|
||||
|
||||
const RawTextBufferSchema = new Schema({
|
||||
sourceId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
rawText: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
metadata: Object
|
||||
});
|
||||
|
||||
try {
|
||||
RawTextBufferSchema.index({ sourceId: 1 });
|
||||
// 20 minutes
|
||||
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoRwaTextBuffer: Model<RawTextBufferSchemaType> =
|
||||
models[collectionName] || model(collectionName, RawTextBufferSchema);
|
||||
MongoRwaTextBuffer.syncIndexes();
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
export type RawTextBufferSchemaType = {
|
||||
sourceId: string;
|
||||
rawText: string;
|
||||
createTime: Date;
|
||||
metadata?: {
|
||||
filename: string;
|
||||
};
|
||||
};
|
||||
|
|
@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../../common/mongo';
|
|||
const { Schema, model, models } = connectionMongo;
|
||||
import { TTSBufferSchemaType } from './type.d';
|
||||
|
||||
export const collectionName = 'ttsbuffers';
|
||||
export const collectionName = 'buffer.tts';
|
||||
|
||||
const TTSBufferSchema = new Schema({
|
||||
bufferId: {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,18 @@ import fsp from 'fs/promises';
|
|||
import fs from 'fs';
|
||||
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoFileSchema } from './schema';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { readFileRawText } from '../read/rawText';
|
||||
import { ReadFileByBufferParams } from '../read/type';
|
||||
import { readMarkdown } from '../read/markdown';
|
||||
import { readHtmlRawText } from '../read/html';
|
||||
import { readPdfFile } from '../read/pdf';
|
||||
import { readWordFile } from '../read/word';
|
||||
import { readCsvRawText } from '../read/csv';
|
||||
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readPptxRawText } from '../read/pptx';
|
||||
import { readXlsxRawText } from '../read/xlsx';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoFileSchema;
|
||||
|
|
@ -111,3 +123,139 @@ export async function getDownloadStream({
|
|||
|
||||
return bucket.openDownloadStream(new Types.ObjectId(fileId));
|
||||
}
|
||||
|
||||
export const readFileEncode = async ({
|
||||
bucketName,
|
||||
fileId
|
||||
}: {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileId: string;
|
||||
}) => {
|
||||
const encodeStream = await getDownloadStream({ bucketName, fileId });
|
||||
let buffers: Buffer = Buffer.from([]);
|
||||
for await (const chunk of encodeStream) {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
if (buffers.length > 10) {
|
||||
encodeStream.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const encoding = detectFileEncoding(buffers);
|
||||
|
||||
return encoding as BufferEncoding;
|
||||
};
|
||||
|
||||
export const readFileContent = async ({
|
||||
teamId,
|
||||
bucketName,
|
||||
fileId,
|
||||
csvFormat = false
|
||||
}: {
|
||||
teamId: string;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileId: string;
|
||||
csvFormat?: boolean;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
filename: string;
|
||||
}> => {
|
||||
// read buffer
|
||||
const fileBuffer = await MongoRwaTextBuffer.findOne({ sourceId: fileId }).lean();
|
||||
if (fileBuffer) {
|
||||
return {
|
||||
rawText: fileBuffer.rawText,
|
||||
filename: fileBuffer.metadata?.filename || ''
|
||||
};
|
||||
}
|
||||
|
||||
const [file, encoding, fileStream] = await Promise.all([
|
||||
getFileById({ bucketName, fileId }),
|
||||
readFileEncode({ bucketName, fileId }),
|
||||
getDownloadStream({ bucketName, fileId })
|
||||
]);
|
||||
|
||||
if (!file) {
|
||||
return Promise.reject(CommonErrEnum.fileNotFound);
|
||||
}
|
||||
|
||||
const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
|
||||
|
||||
const fileBuffers = await (() => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let buffers = Buffer.from([]);
|
||||
fileStream.on('data', (chunk) => {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
});
|
||||
fileStream.on('end', () => {
|
||||
resolve(buffers);
|
||||
});
|
||||
fileStream.on('error', (err) => {
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
})();
|
||||
|
||||
const params: ReadFileByBufferParams = {
|
||||
teamId,
|
||||
buffer: fileBuffers,
|
||||
encoding,
|
||||
metadata: {
|
||||
relatedId: fileId
|
||||
}
|
||||
};
|
||||
|
||||
const { rawText } = await (async () => {
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
return readFileRawText(params);
|
||||
case 'md':
|
||||
return readMarkdown(params);
|
||||
case 'html':
|
||||
return readHtmlRawText(params);
|
||||
case 'pdf':
|
||||
return readPdfFile(params);
|
||||
case 'docx':
|
||||
return readWordFile(params);
|
||||
case 'pptx':
|
||||
return readPptxRawText(params);
|
||||
case 'xlsx':
|
||||
const xlsxResult = await readXlsxRawText(params);
|
||||
if (csvFormat) {
|
||||
return {
|
||||
rawText: xlsxResult.formatText || ''
|
||||
};
|
||||
}
|
||||
return {
|
||||
rawText: xlsxResult.rawText
|
||||
};
|
||||
case 'csv':
|
||||
const csvResult = await readCsvRawText(params);
|
||||
if (csvFormat) {
|
||||
return {
|
||||
rawText: csvResult.formatText || ''
|
||||
};
|
||||
}
|
||||
return {
|
||||
rawText: csvResult.rawText
|
||||
};
|
||||
default:
|
||||
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
|
||||
}
|
||||
})();
|
||||
|
||||
if (rawText.trim()) {
|
||||
await MongoRwaTextBuffer.create({
|
||||
sourceId: fileId,
|
||||
rawText,
|
||||
metadata: {
|
||||
filename: file.filename
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
rawText,
|
||||
filename: file.filename
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ export async function uploadMongoImg({
|
|||
teamId,
|
||||
expiredTime,
|
||||
metadata,
|
||||
|
||||
shareId
|
||||
}: UploadImgProps & {
|
||||
teamId: string;
|
||||
|
|
@ -30,9 +29,8 @@ export async function uploadMongoImg({
|
|||
type,
|
||||
teamId,
|
||||
binary,
|
||||
expiredTime: expiredTime,
|
||||
expiredTime,
|
||||
metadata,
|
||||
|
||||
shareId
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -25,13 +25,13 @@ const ImageSchema = new Schema({
|
|||
enum: Object.keys(mongoImageTypeMap),
|
||||
required: true
|
||||
},
|
||||
|
||||
metadata: {
|
||||
type: Object
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
// tts expired
|
||||
ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
|
||||
ImageSchema.index({ type: 1 });
|
||||
ImageSchema.index({ createTime: 1 });
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
import Papa from 'papaparse';
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
// 加载源文件内容
|
||||
export const readCsvRawText = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const { rawText } = readFileRawText(params);
|
||||
|
||||
const csvArr = Papa.parse(rawText).data as string[][];
|
||||
|
||||
const header = csvArr[0];
|
||||
|
||||
const formatText = header
|
||||
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
|
||||
: '';
|
||||
|
||||
return {
|
||||
rawText,
|
||||
formatText
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import { initMarkdownText } from './utils';
|
||||
import { htmlToMarkdown } from '../../string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readHtmlRawText = async (
|
||||
params: ReadFileByBufferParams
|
||||
): Promise<ReadFileResponse> => {
|
||||
const { teamId, metadata } = params;
|
||||
const { rawText: html } = readFileRawText(params);
|
||||
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
const rawText = await initMarkdownText({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import { initMarkdownText } from './utils';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readMarkdown = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const { teamId, metadata } = params;
|
||||
const { rawText: md } = readFileRawText(params);
|
||||
|
||||
const rawText = await initMarkdownText({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import fs from 'fs';
|
||||
import decompress from 'decompress';
|
||||
import { DOMParser } from '@xmldom/xmldom';
|
||||
import { clearDirFiles } from '../utils';
|
||||
import { addLog } from '../../system/log';
|
||||
|
||||
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
|
||||
|
||||
function getNewFileName(ext: string) {
|
||||
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
|
||||
}
|
||||
|
||||
const parseString = (xml: string) => {
|
||||
let parser = new DOMParser();
|
||||
return parser.parseFromString(xml, 'text/xml');
|
||||
};
|
||||
|
||||
const parsePowerPoint = async ({
|
||||
filepath,
|
||||
decompressPath,
|
||||
encoding
|
||||
}: {
|
||||
filepath: string;
|
||||
decompressPath: string;
|
||||
encoding: BufferEncoding;
|
||||
}) => {
|
||||
// Files regex that hold our content of interest
|
||||
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
|
||||
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
|
||||
|
||||
/** The decompress location which contains the filename in it */
|
||||
|
||||
const files = await decompress(filepath, decompressPath, {
|
||||
filter: (x) => !!x.path.match(allFilesRegex)
|
||||
});
|
||||
|
||||
// Verify if atleast the slides xml files exist in the extracted files list.
|
||||
if (
|
||||
files.length == 0 ||
|
||||
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
|
||||
) {
|
||||
return Promise.reject('解析 PPT 失败');
|
||||
}
|
||||
|
||||
// Returning an array of all the xml contents read using fs.readFileSync
|
||||
const xmlContentArray = files.map((file) =>
|
||||
fs.readFileSync(`${decompressPath}/${file.path}`, encoding)
|
||||
);
|
||||
|
||||
let responseArr: string[] = [];
|
||||
|
||||
xmlContentArray.forEach((xmlContent) => {
|
||||
/** Find text nodes with a:p tags */
|
||||
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
|
||||
|
||||
/** Store all the text content to respond */
|
||||
responseArr.push(
|
||||
Array.from(xmlParagraphNodesList)
|
||||
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
|
||||
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
|
||||
.map((paragraphNode) => {
|
||||
/** Find text nodes with a:t tags */
|
||||
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
|
||||
return Array.from(xmlTextNodeList)
|
||||
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
|
||||
.map((textNode) => textNode.childNodes[0].nodeValue)
|
||||
.join('');
|
||||
})
|
||||
.join('\n')
|
||||
);
|
||||
});
|
||||
|
||||
return responseArr.join('\n');
|
||||
};
|
||||
|
||||
export const parseOffice = async ({
|
||||
buffer,
|
||||
encoding,
|
||||
extension
|
||||
}: {
|
||||
buffer: Buffer;
|
||||
encoding: BufferEncoding;
|
||||
extension: string;
|
||||
}) => {
|
||||
// Prepare file for processing
|
||||
// create temp file subdirectory if it does not exist
|
||||
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
|
||||
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
|
||||
}
|
||||
|
||||
// temp file name
|
||||
const filepath = getNewFileName(extension);
|
||||
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
|
||||
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
|
||||
|
||||
// write new file
|
||||
fs.writeFileSync(filepath, buffer, {
|
||||
encoding
|
||||
});
|
||||
|
||||
const text = await (async () => {
|
||||
try {
|
||||
switch (extension) {
|
||||
case 'pptx':
|
||||
return parsePowerPoint({ filepath, decompressPath, encoding });
|
||||
default:
|
||||
return Promise.reject('只能读取 .pptx 文件');
|
||||
}
|
||||
} catch (error) {
|
||||
addLog.error(`Load ppt error`, { error });
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
|
||||
fs.unlinkSync(filepath);
|
||||
clearDirFiles(decompressPath);
|
||||
return text;
|
||||
};
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
/* read file to txt */
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
// @ts-ignore
|
||||
import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
|
|
@ -11,9 +13,9 @@ type TokenType = {
|
|||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
export const readPdfFile = async ({
|
||||
buffer
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
|
@ -51,14 +53,19 @@ export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
|
|||
.join('');
|
||||
};
|
||||
|
||||
const doc = await pdfjsLib.getDocument(pdf).promise;
|
||||
const loadingTask = pdfjs.getDocument(buffer.buffer);
|
||||
const doc = await loadingTask.promise;
|
||||
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
loadingTask.destroy();
|
||||
|
||||
return {
|
||||
rawText: pageTexts.join('')
|
||||
rawText: pageTexts.join(''),
|
||||
metadata: {}
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
// import { parseOfficeAsync } from 'officeparser';
|
||||
import { parseOffice } from './parseOffice';
|
||||
|
||||
export const readPptxRawText = async ({
|
||||
buffer,
|
||||
encoding
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const result = await parseOffice({ buffer, encoding, extension: 'pptx' });
|
||||
|
||||
return {
|
||||
rawText: result
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
|
||||
// 加载源文件内容
|
||||
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => {
|
||||
const content = buffer.toString(encoding);
|
||||
|
||||
return {
|
||||
rawText: content
|
||||
};
|
||||
};
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
export type ReadFileByBufferParams = {
|
||||
teamId: string;
|
||||
buffer: Buffer;
|
||||
encoding: BufferEncoding;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
export type ReadFileResponse = {
|
||||
rawText: string;
|
||||
formatText?: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { uploadMongoImg } from '../image/controller';
|
||||
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
||||
import { addHours } from 'date-fns';
|
||||
|
||||
export const initMarkdownText = ({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
}: {
|
||||
md: string;
|
||||
teamId: string;
|
||||
metadata?: Record<string, any>;
|
||||
}) =>
|
||||
markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController: (base64Img) =>
|
||||
uploadMongoImg({
|
||||
type: MongoImageTypeEnum.collectionImage,
|
||||
base64Img,
|
||||
teamId,
|
||||
metadata,
|
||||
expiredTime: addHours(new Date(), 2)
|
||||
})
|
||||
});
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
import mammoth from 'mammoth';
|
||||
import { htmlToMarkdown } from '../../string/markdown';
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type';
|
||||
import { initMarkdownText } from './utils';
|
||||
|
||||
/**
|
||||
* read docx to markdown
|
||||
*/
|
||||
export const readWordFile = async ({
|
||||
teamId,
|
||||
buffer,
|
||||
metadata = {}
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
try {
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
buffer
|
||||
});
|
||||
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
const rawText = await initMarkdownText({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText,
|
||||
metadata: {}
|
||||
};
|
||||
} catch (error) {
|
||||
console.log('error doc read:', error);
|
||||
return Promise.reject('Can not read doc file, please convert to PDF');
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import xlsx from 'node-xlsx';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
export const readXlsxRawText = async ({
|
||||
buffer
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const result = xlsx.parse(buffer, {
|
||||
skipHidden: false,
|
||||
defval: ''
|
||||
});
|
||||
|
||||
const format2Csv = result.map(({ name, data }) => {
|
||||
return {
|
||||
title: `#${name}`,
|
||||
csvText: data.map((item) => item.join(',')).join('\n')
|
||||
};
|
||||
});
|
||||
|
||||
const rawText = format2Csv.map((item) => item.csvText).join('\n');
|
||||
const formatText = format2Csv
|
||||
.map((item) => {
|
||||
const csvArr = Papa.parse(item.csvText).data as string[][];
|
||||
const header = csvArr[0];
|
||||
|
||||
const formatText = header
|
||||
? csvArr
|
||||
.map((item) =>
|
||||
item
|
||||
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
)
|
||||
.join('\n')
|
||||
: '';
|
||||
|
||||
return `${item.title}\n${formatText}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
return {
|
||||
rawText: rawText,
|
||||
formatText
|
||||
};
|
||||
};
|
||||
|
|
@ -35,13 +35,8 @@ export const clearDirFiles = (dirPath: string) => {
|
|||
return;
|
||||
}
|
||||
|
||||
fs.readdirSync(dirPath).forEach((file) => {
|
||||
const curPath = `${dirPath}/${file}`;
|
||||
if (fs.lstatSync(curPath).isDirectory()) {
|
||||
clearDirFiles(curPath);
|
||||
} else {
|
||||
fs.unlinkSync(curPath);
|
||||
}
|
||||
fs.rmdirSync(dirPath, {
|
||||
recursive: true
|
||||
});
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import {
|
|||
DatasetCollectionSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
import { delImgByRelatedId } from '../../../common/file/image/controller';
|
||||
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
tableLocal = 'tableLocal'
|
||||
}
|
||||
|
|
@ -1,14 +1,16 @@
|
|||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { MongoDatasetTraining } from './schema';
|
||||
import type {
|
||||
PushDatasetDataChunkProps,
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import { getCollectionWithDataset } from '../controller';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { getLLMModel, getVectorModel } from '../../ai/model';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { getCollectionWithDataset } from '../controller';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
||||
try {
|
||||
|
|
@ -23,31 +25,52 @@ export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> =>
|
|||
} catch (error) {}
|
||||
};
|
||||
|
||||
export async function pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
export const pushDataListToTrainingQueueByCollectionId = async ({
|
||||
collectionId,
|
||||
data,
|
||||
prompt,
|
||||
billId,
|
||||
trainingMode = TrainingModeEnum.chunk
|
||||
...props
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
|
||||
const vectorModelList = global.vectorModels;
|
||||
const datasetModelList = global.llmModels;
|
||||
|
||||
session?: ClientSession;
|
||||
} & PushDatasetDataProps) => {
|
||||
const {
|
||||
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||
datasetId: { _id: datasetId, agentModel, vectorModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
return pushDataListToTrainingQueue({
|
||||
...props,
|
||||
datasetId,
|
||||
collectionId,
|
||||
agentModel,
|
||||
vectorModel
|
||||
});
|
||||
};
|
||||
|
||||
export async function pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
agentModel,
|
||||
vectorModel,
|
||||
data,
|
||||
prompt,
|
||||
billId,
|
||||
trainingMode = TrainingModeEnum.chunk,
|
||||
session
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
agentModel: string;
|
||||
vectorModel: string;
|
||||
session?: ClientSession;
|
||||
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
|
||||
const checkModelValid = async () => {
|
||||
const agentModelData = datasetModelList?.find((item) => item.model === agentModel);
|
||||
const agentModelData = getLLMModel(agentModel);
|
||||
if (!agentModelData) {
|
||||
return Promise.reject(`File model ${agentModel} is inValid`);
|
||||
}
|
||||
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
|
||||
const vectorModelData = getVectorModel(vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Vector model ${vectorModel} is inValid`);
|
||||
}
|
||||
|
|
@ -124,52 +147,43 @@ export async function pushDataListToTrainingQueue({
|
|||
});
|
||||
|
||||
// insert data to db
|
||||
const insertData = async (dataList: PushDatasetDataChunkProps[], retry = 3): Promise<number> => {
|
||||
try {
|
||||
const results = await MongoDatasetTraining.insertMany(
|
||||
dataList.map((item, i) => ({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
billId,
|
||||
mode: trainingMode,
|
||||
prompt,
|
||||
model,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex ?? 0,
|
||||
weight: weight ?? 0,
|
||||
indexes: item.indexes
|
||||
}))
|
||||
);
|
||||
await delay(500);
|
||||
return results.length;
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
await delay(500);
|
||||
return insertData(dataList, retry - 1);
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
};
|
||||
const insertLen = filterResult.success.length;
|
||||
const failedDocuments: PushDatasetDataChunkProps[] = [];
|
||||
|
||||
let insertLen = 0;
|
||||
const chunkSize = 50;
|
||||
const chunkList = filterResult.success.reduce(
|
||||
(acc, cur) => {
|
||||
const lastChunk = acc[acc.length - 1];
|
||||
if (lastChunk.length < chunkSize) {
|
||||
lastChunk.push(cur);
|
||||
} else {
|
||||
acc.push([cur]);
|
||||
// 使用 insertMany 批量插入
|
||||
try {
|
||||
await MongoDatasetTraining.insertMany(
|
||||
filterResult.success.map((item) => ({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
billId,
|
||||
mode: trainingMode,
|
||||
prompt,
|
||||
model,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex ?? 0,
|
||||
weight: weight ?? 0,
|
||||
indexes: item.indexes
|
||||
})),
|
||||
{
|
||||
session
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
[[]] as PushDatasetDataChunkProps[][]
|
||||
);
|
||||
for await (const chunks of chunkList) {
|
||||
insertLen += await insertData(chunks);
|
||||
);
|
||||
} catch (error: any) {
|
||||
addLog.error(`Insert error`, error);
|
||||
// 如果有错误,将失败的文档添加到失败列表中
|
||||
error.writeErrors.forEach((writeError: any) => {
|
||||
failedDocuments.push(data[writeError.index]);
|
||||
});
|
||||
console.log('failed', failedDocuments);
|
||||
}
|
||||
|
||||
// 对于失败的文档,尝试单独插入
|
||||
for await (const item of failedDocuments) {
|
||||
await MongoDatasetTraining.create(item);
|
||||
}
|
||||
|
||||
delete filterResult.success;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
|||
import { addLog } from '../../../common/system/log';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { MongoDatasetTraining } from './schema';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
export const checkInvalidChunkAndLock = async ({
|
||||
err,
|
||||
|
|
@ -39,3 +40,18 @@ export const checkInvalidChunkAndLock = async ({
|
|||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
export const parseCsvTable2Chunks = (rawText: string) => {
|
||||
const csvArr = Papa.parse(rawText).data as string[][];
|
||||
|
||||
const chunks = csvArr
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || ''
|
||||
}))
|
||||
.filter((item) => item.q || item.a);
|
||||
|
||||
return {
|
||||
chunks
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,27 +4,36 @@
|
|||
"dependencies": {
|
||||
"@fastgpt/global": "workspace:*",
|
||||
"@node-rs/jieba": "1.10.0",
|
||||
"@xmldom/xmldom": "^0.8.10",
|
||||
"axios": "^1.5.1",
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"cookie": "^0.5.0",
|
||||
"date-fns": "2.30.0",
|
||||
"dayjs": "^1.11.7",
|
||||
"decompress": "^4.2.1",
|
||||
"encoding": "^0.1.13",
|
||||
"file-type": "^19.0.0",
|
||||
"json5": "^2.2.3",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"mammoth": "^1.6.0",
|
||||
"mongoose": "^7.0.2",
|
||||
"multer": "1.4.5-lts.1",
|
||||
"next": "13.5.2",
|
||||
"nextjs-cors": "^2.1.2",
|
||||
"node-cron": "^3.0.3",
|
||||
"node-xlsx": "^0.23.0",
|
||||
"papaparse": "5.4.1",
|
||||
"pdfjs-dist": "4.0.269",
|
||||
"pg": "^8.10.0",
|
||||
"tunnel": "^0.0.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/cookie": "^0.5.2",
|
||||
"@types/decompress": "^4.2.7",
|
||||
"@types/jsonwebtoken": "^9.0.3",
|
||||
"@types/multer": "^1.4.10",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/papaparse": "5.3.7",
|
||||
"@types/pg": "^8.6.6",
|
||||
"@types/tunnel": "^0.0.4"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
import { AuthResponseType } from '@fastgpt/global/support/permission/type';
|
||||
import { AuthModeType } from '../type';
|
||||
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
||||
import { parseHeaderCert } from '../controller';
|
||||
import { getFileById } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
|
||||
export async function authFile({
|
||||
fileId,
|
||||
per = 'owner',
|
||||
...props
|
||||
}: AuthModeType & {
|
||||
fileId: string;
|
||||
}): Promise<
|
||||
AuthResponseType & {
|
||||
file: DatasetFileSchema;
|
||||
}
|
||||
> {
|
||||
const authRes = await parseHeaderCert(props);
|
||||
const { teamId, tmbId } = authRes;
|
||||
|
||||
const file = await getFileById({ bucketName: BucketNameEnum.dataset, fileId });
|
||||
|
||||
if (!file) {
|
||||
return Promise.reject(CommonErrEnum.fileNotFound);
|
||||
}
|
||||
|
||||
if (file.metadata?.teamId !== teamId) {
|
||||
return Promise.reject(CommonErrEnum.unAuthFile);
|
||||
}
|
||||
if (per === 'owner' && file.metadata?.tmbId !== tmbId) {
|
||||
return Promise.reject(CommonErrEnum.unAuthFile);
|
||||
}
|
||||
|
||||
return {
|
||||
...authRes,
|
||||
isOwner: per === 'owner',
|
||||
canWrite: per === 'owner',
|
||||
file
|
||||
};
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
import Papa from 'papaparse';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
/**
|
||||
* read csv to json
|
||||
* @response {
|
||||
* header: string[],
|
||||
* data: string[][]
|
||||
* }
|
||||
*/
|
||||
export const readCsvContent = async ({ file }: { file: File }) => {
|
||||
try {
|
||||
const { rawText: textArr } = await readFileRawText(file);
|
||||
const csvArr = Papa.parse(textArr).data as string[][];
|
||||
if (csvArr.length === 0) {
|
||||
throw new Error('csv 解析失败');
|
||||
}
|
||||
|
||||
const header = csvArr.shift() as string[];
|
||||
|
||||
// add title to data
|
||||
const rawText = csvArr
|
||||
.map((item) =>
|
||||
item.map((value, index) => {
|
||||
if (!header[index]) return value;
|
||||
return `${header[index]}: ${value}`;
|
||||
})
|
||||
)
|
||||
.flat()
|
||||
.join('\n');
|
||||
|
||||
return {
|
||||
rawText,
|
||||
header,
|
||||
data: csvArr.map((item) => item)
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject('解析 csv 文件失败');
|
||||
}
|
||||
};
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
|
||||
export const readHtmlFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText } = await readFileRawText(file);
|
||||
const md = htmlStr2Md(rawText);
|
||||
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
|
||||
return { rawText: simpleMd };
|
||||
};
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
import { loadFile2Buffer } from '../utils';
|
||||
import { readCsvContent } from './csv';
|
||||
import { readHtmlFile } from './html';
|
||||
import { readMdFile } from './md';
|
||||
import { readPdfFile } from './pdf';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { readWordFile } from './word';
|
||||
|
||||
export const readFileRawContent = async ({
|
||||
file,
|
||||
uploadBase64Controller
|
||||
}: {
|
||||
file: File;
|
||||
uploadBase64Controller?: (base64: string) => Promise<string>;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
}> => {
|
||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
return readFileRawText(file);
|
||||
case 'md':
|
||||
return readMdFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'html':
|
||||
return readHtmlFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'csv':
|
||||
return readCsvContent({ file });
|
||||
case 'pdf':
|
||||
const pdf = await loadFile2Buffer({ file });
|
||||
return readPdfFile({ pdf });
|
||||
case 'docx':
|
||||
return readWordFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
|
||||
default:
|
||||
return {
|
||||
rawText: ''
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readMdFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText: md } = await readFileRawText(file);
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
return { rawText: simpleMd };
|
||||
};
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
|
||||
/**
|
||||
* read file raw text
|
||||
*/
|
||||
export const readFileRawText = (file: File) => {
|
||||
return new Promise<{ rawText: string }>((resolve, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
//@ts-ignore
|
||||
const encode = detectFileEncoding(reader.result);
|
||||
|
||||
// 再次读取文件,这次使用检测到的编码
|
||||
const reader2 = new FileReader();
|
||||
reader2.onload = () => {
|
||||
resolve({
|
||||
rawText: reader2.result as string
|
||||
});
|
||||
};
|
||||
reader2.onerror = (err) => {
|
||||
console.log('Error reading file with detected encoding:', err);
|
||||
reject('Read file error with detected encoding');
|
||||
};
|
||||
reader2.readAsText(file, encode);
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error txt read:', err);
|
||||
reject('Read file error');
|
||||
};
|
||||
reader.readAsBinaryString(file);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { loadFile2Buffer } from '../utils';
|
||||
import mammoth from 'mammoth';
|
||||
|
||||
export const readWordFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const buffer = await loadFile2Buffer({ file });
|
||||
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
arrayBuffer: buffer
|
||||
});
|
||||
const md = htmlStr2Md(html);
|
||||
|
||||
const rawText = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController: uploadImgController
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
|
|
@ -101,6 +101,7 @@ export const iconPaths = {
|
|||
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
|
||||
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
|
||||
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
|
||||
'core/dataset/splitLight': () => import('./icons/core/dataset/splitLight.svg'),
|
||||
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
|
||||
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
|
||||
'core/modules/basicNode': () => import('./icons/core/modules/basicNode.svg'),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
<svg t="1711938287623" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
|
||||
p-id="5143">
|
||||
<path
|
||||
d="M153.6 153.6h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 1 1 0-102.4z m0 614.4h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m0-307.2h131.6352a51.2 51.2 0 1 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m292.5568 0h131.6864a51.2 51.2 0 0 1 0 102.4H446.1568a51.2 51.2 0 0 1 0-102.4z m292.608 0H870.4a51.2 51.2 0 0 1 0 102.4h-131.6352a51.2 51.2 0 0 1 0-102.4z"
|
||||
p-id="5144"></path>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 554 B |
|
|
@ -0,0 +1,70 @@
|
|||
import React from 'react';
|
||||
import MyIcon from '../Icon';
|
||||
import {
|
||||
Drawer,
|
||||
DrawerBody,
|
||||
DrawerHeader,
|
||||
DrawerOverlay,
|
||||
DrawerContent,
|
||||
DrawerCloseButton,
|
||||
DrawerContentProps,
|
||||
Flex,
|
||||
Image
|
||||
} from '@chakra-ui/react';
|
||||
import { useLoading } from '../../../hooks/useLoading';
|
||||
|
||||
type Props = DrawerContentProps & {
|
||||
onClose: () => void;
|
||||
iconSrc?: string;
|
||||
title?: any;
|
||||
isLoading?: boolean;
|
||||
};
|
||||
|
||||
const MyRightDrawer = ({
|
||||
onClose,
|
||||
iconSrc,
|
||||
title,
|
||||
maxW = ['90vw', '30vw'],
|
||||
children,
|
||||
isLoading,
|
||||
...props
|
||||
}: Props) => {
|
||||
const { Loading } = useLoading();
|
||||
return (
|
||||
<Drawer isOpen placement="right" onClose={onClose}>
|
||||
<DrawerOverlay />
|
||||
<DrawerContent
|
||||
maxW={maxW}
|
||||
{...props}
|
||||
h={'94%'}
|
||||
mt={'2%'}
|
||||
borderLeftRadius={'lg'}
|
||||
overflow={'hidden'}
|
||||
>
|
||||
<DrawerCloseButton />
|
||||
<DrawerHeader>
|
||||
<Flex alignItems={'center'} pr={2}>
|
||||
{iconSrc && (
|
||||
<>
|
||||
{iconSrc.startsWith('/') ? (
|
||||
<Image mr={3} objectFit={'contain'} alt="" src={iconSrc} w={'20px'} />
|
||||
) : (
|
||||
<MyIcon mr={3} name={iconSrc as any} w={'20px'} />
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{title}
|
||||
</Flex>
|
||||
<DrawerCloseButton zIndex={1} />
|
||||
</DrawerHeader>
|
||||
|
||||
<DrawerBody>
|
||||
{children}
|
||||
<Loading loading={isLoading} fixed={false} />
|
||||
</DrawerBody>
|
||||
</DrawerContent>
|
||||
</Drawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default MyRightDrawer;
|
||||
|
|
@ -2,6 +2,8 @@ import React from 'react';
|
|||
import { Box, Flex, useTheme, Grid, type GridProps } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '../MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import QuestionTip from '../MyTooltip/QuestionTip';
|
||||
|
||||
// @ts-ignore
|
||||
interface Props extends GridProps {
|
||||
|
|
@ -36,58 +38,59 @@ const LeftRadio = ({
|
|||
return (
|
||||
<Grid gridGap={[3, 5]} fontSize={['sm', 'md']} {...props}>
|
||||
{list.map((item) => (
|
||||
<MyTooltip key={item.value} label={item.tooltip}>
|
||||
<Flex
|
||||
alignItems={item.desc ? align : 'center'}
|
||||
cursor={'pointer'}
|
||||
userSelect={'none'}
|
||||
px={px}
|
||||
py={py}
|
||||
border={theme.borders.sm}
|
||||
borderWidth={'1px'}
|
||||
borderRadius={'md'}
|
||||
position={'relative'}
|
||||
{...(value === item.value
|
||||
? {
|
||||
borderColor: 'primary.400',
|
||||
bg: activeBg,
|
||||
boxShadow: 'focus'
|
||||
<Flex
|
||||
alignItems={item.desc ? align : 'center'}
|
||||
key={item.value}
|
||||
cursor={'pointer'}
|
||||
userSelect={'none'}
|
||||
px={px}
|
||||
py={py}
|
||||
border={theme.borders.sm}
|
||||
borderWidth={'1px'}
|
||||
borderRadius={'md'}
|
||||
position={'relative'}
|
||||
{...(value === item.value
|
||||
? {
|
||||
borderColor: 'primary.400',
|
||||
bg: activeBg,
|
||||
boxShadow: 'focus'
|
||||
}
|
||||
: {
|
||||
bg: defaultBg,
|
||||
_hover: {
|
||||
borderColor: 'primary.300'
|
||||
}
|
||||
: {
|
||||
bg: defaultBg,
|
||||
_hover: {
|
||||
borderColor: 'primary.300'
|
||||
}
|
||||
})}
|
||||
onClick={() => onChange(item.value)}
|
||||
})}
|
||||
onClick={() => onChange(item.value)}
|
||||
>
|
||||
<Box
|
||||
w={'18px'}
|
||||
h={'18px'}
|
||||
borderWidth={'2.4px'}
|
||||
borderColor={value === item.value ? 'primary.015' : 'transparent'}
|
||||
borderRadius={'50%'}
|
||||
mr={3}
|
||||
>
|
||||
<Box
|
||||
w={'18px'}
|
||||
h={'18px'}
|
||||
borderWidth={'2.4px'}
|
||||
borderColor={value === item.value ? 'primary.015' : 'transparent'}
|
||||
<Flex
|
||||
w={'100%'}
|
||||
h={'100%'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
|
||||
bg={value === item.value ? 'primary.1' : 'transparent'}
|
||||
borderRadius={'50%'}
|
||||
mr={3}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
>
|
||||
<Flex
|
||||
w={'100%'}
|
||||
h={'100%'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
|
||||
bg={value === item.value ? 'primary.1' : 'transparent'}
|
||||
<Box
|
||||
w={'5px'}
|
||||
h={'5px'}
|
||||
borderRadius={'50%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
>
|
||||
<Box
|
||||
w={'5px'}
|
||||
h={'5px'}
|
||||
borderRadius={'50%'}
|
||||
bg={value === item.value ? 'primary.600' : 'transparent'}
|
||||
></Box>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'1 0 0'}>
|
||||
bg={value === item.value ? 'primary.600' : 'transparent'}
|
||||
></Box>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Box flex={'1 0 0'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<Box
|
||||
color={'myGray.900'}
|
||||
fontWeight={item.desc ? '500' : 'normal'}
|
||||
|
|
@ -95,15 +98,16 @@ const LeftRadio = ({
|
|||
>
|
||||
{typeof item.title === 'string' ? t(item.title) : item.title}
|
||||
</Box>
|
||||
{!!item.desc && (
|
||||
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
|
||||
{t(item.desc)}
|
||||
</Box>
|
||||
)}
|
||||
{item?.children}
|
||||
</Box>
|
||||
</Flex>
|
||||
</MyTooltip>
|
||||
{!!item.tooltip && <QuestionTip label={item.tooltip} ml={1} color={'myGray.600'} />}
|
||||
</Flex>
|
||||
{!!item.desc && (
|
||||
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
|
||||
{t(item.desc)}
|
||||
</Box>
|
||||
)}
|
||||
{item?.children}
|
||||
</Box>
|
||||
</Flex>
|
||||
))}
|
||||
</Grid>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -12,31 +12,31 @@
|
|||
"@emotion/styled": "^11.11.0",
|
||||
"@fastgpt/global": "workspace:*",
|
||||
"@fingerprintjs/fingerprintjs": "^4.2.1",
|
||||
"@lexical/react": "0.12.6",
|
||||
"@lexical/text": "0.12.6",
|
||||
"@lexical/utils": "0.12.6",
|
||||
"@monaco-editor/react": "^4.6.0",
|
||||
"mammoth": "^1.6.0",
|
||||
"@tanstack/react-query": "^4.24.10",
|
||||
"date-fns": "2.30.0",
|
||||
"dayjs": "^1.11.7",
|
||||
"i18next": "23.10.0",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||
"lexical": "0.12.6",
|
||||
"lodash": "^4.17.21",
|
||||
"mammoth": "^1.6.0",
|
||||
"next-i18next": "15.2.0",
|
||||
"papaparse": "^5.4.1",
|
||||
"pdfjs-dist": "4.0.269",
|
||||
"react": "18.2.0",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"react-dom": "18.2.0",
|
||||
"react-i18next": "13.5.0",
|
||||
"turndown": "^7.1.2",
|
||||
"lexical": "0.12.6",
|
||||
"@lexical/react": "0.12.6",
|
||||
"papaparse": "^5.4.1",
|
||||
"@lexical/utils": "0.12.6",
|
||||
"@lexical/text": "0.12.6",
|
||||
"date-fns": "2.30.0",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"lodash": "^4.17.21",
|
||||
"@tanstack/react-query": "^4.24.10",
|
||||
"dayjs": "^1.11.7"
|
||||
"turndown": "^7.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/papaparse": "^5.3.7",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/react-dom": "18.2.0",
|
||||
"@types/turndown": "^5.0.4"
|
||||
}
|
||||
|
|
|
|||
334
pnpm-lock.yaml
334
pnpm-lock.yaml
|
|
@ -99,6 +99,9 @@ importers:
|
|||
'@node-rs/jieba':
|
||||
specifier: 1.10.0
|
||||
version: 1.10.0
|
||||
'@xmldom/xmldom':
|
||||
specifier: ^0.8.10
|
||||
version: 0.8.10
|
||||
axios:
|
||||
specifier: ^1.5.1
|
||||
version: 1.6.8
|
||||
|
|
@ -114,15 +117,24 @@ importers:
|
|||
dayjs:
|
||||
specifier: ^1.11.7
|
||||
version: 1.11.10
|
||||
decompress:
|
||||
specifier: ^4.2.1
|
||||
version: 4.2.1
|
||||
encoding:
|
||||
specifier: ^0.1.13
|
||||
version: 0.1.13
|
||||
file-type:
|
||||
specifier: ^19.0.0
|
||||
version: 19.0.0
|
||||
json5:
|
||||
specifier: ^2.2.3
|
||||
version: 2.2.3
|
||||
jsonwebtoken:
|
||||
specifier: ^9.0.2
|
||||
version: 9.0.2
|
||||
mammoth:
|
||||
specifier: ^1.6.0
|
||||
version: 1.7.0
|
||||
mongoose:
|
||||
specifier: ^7.0.2
|
||||
version: 7.6.10
|
||||
|
|
@ -138,6 +150,15 @@ importers:
|
|||
node-cron:
|
||||
specifier: ^3.0.3
|
||||
version: 3.0.3
|
||||
node-xlsx:
|
||||
specifier: ^0.23.0
|
||||
version: 0.23.0
|
||||
papaparse:
|
||||
specifier: 5.4.1
|
||||
version: 5.4.1
|
||||
pdfjs-dist:
|
||||
specifier: 4.0.269
|
||||
version: 4.0.269(encoding@0.1.13)
|
||||
pg:
|
||||
specifier: ^8.10.0
|
||||
version: 8.11.3
|
||||
|
|
@ -148,6 +169,9 @@ importers:
|
|||
'@types/cookie':
|
||||
specifier: ^0.5.2
|
||||
version: 0.5.4
|
||||
'@types/decompress':
|
||||
specifier: ^4.2.7
|
||||
version: 4.2.7
|
||||
'@types/jsonwebtoken':
|
||||
specifier: ^9.0.3
|
||||
version: 9.0.6
|
||||
|
|
@ -157,6 +181,9 @@ importers:
|
|||
'@types/node-cron':
|
||||
specifier: ^3.0.11
|
||||
version: 3.0.11
|
||||
'@types/papaparse':
|
||||
specifier: 5.3.7
|
||||
version: 5.3.7
|
||||
'@types/pg':
|
||||
specifier: ^8.6.6
|
||||
version: 8.11.3
|
||||
|
|
@ -240,7 +267,7 @@ importers:
|
|||
version: 5.4.1
|
||||
pdfjs-dist:
|
||||
specifier: 4.0.269
|
||||
version: 4.0.269
|
||||
version: 4.0.269(encoding@0.1.13)
|
||||
react:
|
||||
specifier: 18.2.0
|
||||
version: 18.2.0
|
||||
|
|
@ -3789,10 +3816,9 @@ packages:
|
|||
yjs: 13.6.14
|
||||
dev: false
|
||||
|
||||
/@mapbox/node-pre-gyp@1.0.11:
|
||||
/@mapbox/node-pre-gyp@1.0.11(encoding@0.1.13):
|
||||
resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==}
|
||||
hasBin: true
|
||||
requiresBuild: true
|
||||
dependencies:
|
||||
detect-libc: 2.0.3
|
||||
https-proxy-agent: 5.0.1
|
||||
|
|
@ -4522,6 +4548,10 @@ packages:
|
|||
use-sync-external-store: 1.2.0(react@18.2.0)
|
||||
dev: false
|
||||
|
||||
/@tokenizer/token@0.3.0:
|
||||
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
|
||||
dev: false
|
||||
|
||||
/@trysound/sax@0.2.0:
|
||||
resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==}
|
||||
engines: {node: '>=10.13.0'}
|
||||
|
|
@ -4737,6 +4767,12 @@ packages:
|
|||
'@types/ms': 0.7.34
|
||||
dev: false
|
||||
|
||||
/@types/decompress@4.2.7:
|
||||
resolution: {integrity: sha512-9z+8yjKr5Wn73Pt17/ldnmQToaFHZxK0N1GHysuk/JIPT8RIdQeoInM01wWPgypRcvb6VH1drjuFpQ4zmY437g==}
|
||||
dependencies:
|
||||
'@types/node': 20.11.30
|
||||
dev: true
|
||||
|
||||
/@types/estree@1.0.5:
|
||||
resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==}
|
||||
dev: true
|
||||
|
|
@ -4876,6 +4912,12 @@ packages:
|
|||
'@types/node': 20.11.30
|
||||
dev: true
|
||||
|
||||
/@types/papaparse@5.3.7:
|
||||
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==}
|
||||
dependencies:
|
||||
'@types/node': 20.11.30
|
||||
dev: true
|
||||
|
||||
/@types/parse-json@4.0.2:
|
||||
resolution: {integrity: sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==}
|
||||
|
||||
|
|
@ -5550,6 +5592,13 @@ packages:
|
|||
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
/bl@1.2.3:
|
||||
resolution: {integrity: sha512-pvcNpa0UU69UT341rO6AYy4FVAIkUHuZXRIWbq+zHnsVcRzDDjIAhGuuYoi0d//cwIwtt4pkpKycWEfjdV+vww==}
|
||||
dependencies:
|
||||
readable-stream: 2.3.8
|
||||
safe-buffer: 5.2.1
|
||||
dev: false
|
||||
|
||||
/bluebird@3.4.7:
|
||||
resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==}
|
||||
dev: false
|
||||
|
|
@ -5610,10 +5659,29 @@ packages:
|
|||
engines: {node: '>=14.20.1'}
|
||||
dev: false
|
||||
|
||||
/buffer-alloc-unsafe@1.1.0:
|
||||
resolution: {integrity: sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg==}
|
||||
dev: false
|
||||
|
||||
/buffer-alloc@1.2.0:
|
||||
resolution: {integrity: sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==}
|
||||
dependencies:
|
||||
buffer-alloc-unsafe: 1.1.0
|
||||
buffer-fill: 1.0.0
|
||||
dev: false
|
||||
|
||||
/buffer-crc32@0.2.13:
|
||||
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
|
||||
dev: false
|
||||
|
||||
/buffer-equal-constant-time@1.0.1:
|
||||
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
|
||||
dev: false
|
||||
|
||||
/buffer-fill@1.0.0:
|
||||
resolution: {integrity: sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ==}
|
||||
dev: false
|
||||
|
||||
/buffer-from@1.1.2:
|
||||
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
|
||||
dev: false
|
||||
|
|
@ -5623,6 +5691,13 @@ packages:
|
|||
engines: {node: '>=4'}
|
||||
dev: false
|
||||
|
||||
/buffer@5.7.1:
|
||||
resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
|
||||
dependencies:
|
||||
base64-js: 1.5.1
|
||||
ieee754: 1.2.1
|
||||
dev: false
|
||||
|
||||
/busboy@1.6.0:
|
||||
resolution: {integrity: sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==}
|
||||
engines: {node: '>=10.16.0'}
|
||||
|
|
@ -5665,12 +5740,12 @@ packages:
|
|||
/caniuse-lite@1.0.30001599:
|
||||
resolution: {integrity: sha512-LRAQHZ4yT1+f9LemSMeqdMpMxZcc4RMWdj4tiFe3G8tNkWK+E58g+/tzotb5cU6TbcVJLr4fySiAW7XmxQvZQA==}
|
||||
|
||||
/canvas@2.11.2:
|
||||
/canvas@2.11.2(encoding@0.1.13):
|
||||
resolution: {integrity: sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==}
|
||||
engines: {node: '>=6'}
|
||||
requiresBuild: true
|
||||
dependencies:
|
||||
'@mapbox/node-pre-gyp': 1.0.11
|
||||
'@mapbox/node-pre-gyp': 1.0.11(encoding@0.1.13)
|
||||
nan: 2.19.0
|
||||
simple-get: 3.1.1
|
||||
transitivePeerDependencies:
|
||||
|
|
@ -5909,6 +5984,10 @@ packages:
|
|||
engines: {node: '>=16'}
|
||||
dev: true
|
||||
|
||||
/commander@2.20.3:
|
||||
resolution: {integrity: sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==}
|
||||
dev: false
|
||||
|
||||
/commander@7.2.0:
|
||||
resolution: {integrity: sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==}
|
||||
engines: {node: '>= 10'}
|
||||
|
|
@ -6469,6 +6548,59 @@ packages:
|
|||
dev: false
|
||||
optional: true
|
||||
|
||||
/decompress-tar@4.1.1:
|
||||
resolution: {integrity: sha512-JdJMaCrGpB5fESVyxwpCx4Jdj2AagLmv3y58Qy4GE6HMVjWz1FeVQk1Ct4Kye7PftcdOo/7U7UKzYBJgqnGeUQ==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
file-type: 5.2.0
|
||||
is-stream: 1.1.0
|
||||
tar-stream: 1.6.2
|
||||
dev: false
|
||||
|
||||
/decompress-tarbz2@4.1.1:
|
||||
resolution: {integrity: sha512-s88xLzf1r81ICXLAVQVzaN6ZmX4A6U4z2nMbOwobxkLoIIfjVMBg7TeguTUXkKeXni795B6y5rnvDw7rxhAq9A==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
decompress-tar: 4.1.1
|
||||
file-type: 6.2.0
|
||||
is-stream: 1.1.0
|
||||
seek-bzip: 1.0.6
|
||||
unbzip2-stream: 1.4.3
|
||||
dev: false
|
||||
|
||||
/decompress-targz@4.1.1:
|
||||
resolution: {integrity: sha512-4z81Znfr6chWnRDNfFNqLwPvm4db3WuZkqV+UgXQzSngG3CEKdBkw5jrv3axjjL96glyiiKjsxJG3X6WBZwX3w==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
decompress-tar: 4.1.1
|
||||
file-type: 5.2.0
|
||||
is-stream: 1.1.0
|
||||
dev: false
|
||||
|
||||
/decompress-unzip@4.0.1:
|
||||
resolution: {integrity: sha512-1fqeluvxgnn86MOh66u8FjbtJpAFv5wgCT9Iw8rcBqQcCo5tO8eiJw7NNTrvt9n4CRBVq7CstiS922oPgyGLrw==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
file-type: 3.9.0
|
||||
get-stream: 2.3.1
|
||||
pify: 2.3.0
|
||||
yauzl: 2.10.0
|
||||
dev: false
|
||||
|
||||
/decompress@4.2.1:
|
||||
resolution: {integrity: sha512-e48kc2IjU+2Zw8cTb6VZcJQ3lgVbS4uuB1TfCHbiZIP/haNXm+SVyhu+87jts5/3ROpd82GSVCoNs/z8l4ZOaQ==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
decompress-tar: 4.1.1
|
||||
decompress-tarbz2: 4.1.1
|
||||
decompress-targz: 4.1.1
|
||||
decompress-unzip: 4.0.1
|
||||
graceful-fs: 4.2.11
|
||||
make-dir: 1.3.0
|
||||
pify: 2.3.0
|
||||
strip-dirs: 2.1.0
|
||||
dev: false
|
||||
|
||||
/deep-eql@4.1.3:
|
||||
resolution: {integrity: sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw==}
|
||||
engines: {node: '>=6'}
|
||||
|
|
@ -6712,6 +6844,12 @@ packages:
|
|||
iconv-lite: 0.6.3
|
||||
dev: false
|
||||
|
||||
/end-of-stream@1.4.4:
|
||||
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
|
||||
dependencies:
|
||||
once: 1.4.0
|
||||
dev: false
|
||||
|
||||
/enhanced-resolve@5.16.0:
|
||||
resolution: {integrity: sha512-O+QWCviPNSSLAD9Ucn8Awv+poAkqn3T1XY5/N7kR7rQO9yfSGWkYZDwpJ+iKF7B8rxaQKWngSqACpgzeapSyoA==}
|
||||
engines: {node: '>=10.13.0'}
|
||||
|
|
@ -7403,6 +7541,12 @@ packages:
|
|||
dependencies:
|
||||
format: 0.2.2
|
||||
|
||||
/fd-slicer@1.1.0:
|
||||
resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==}
|
||||
dependencies:
|
||||
pend: 1.2.0
|
||||
dev: false
|
||||
|
||||
/file-entry-cache@6.0.1:
|
||||
resolution: {integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==}
|
||||
engines: {node: ^10.12.0 || >=12.0.0}
|
||||
|
|
@ -7410,6 +7554,30 @@ packages:
|
|||
flat-cache: 3.2.0
|
||||
dev: true
|
||||
|
||||
/file-type@19.0.0:
|
||||
resolution: {integrity: sha512-s7cxa7/leUWLiXO78DVVfBVse+milos9FitauDLG1pI7lNaJ2+5lzPnr2N24ym+84HVwJL6hVuGfgVE+ALvU8Q==}
|
||||
engines: {node: '>=18'}
|
||||
dependencies:
|
||||
readable-web-to-node-stream: 3.0.2
|
||||
strtok3: 7.0.0
|
||||
token-types: 5.0.1
|
||||
dev: false
|
||||
|
||||
/file-type@3.9.0:
|
||||
resolution: {integrity: sha512-RLoqTXE8/vPmMuTI88DAzhMYC99I8BWv7zYP4A1puo5HIjEJ5EX48ighy4ZyKMG9EDXxBgW6e++cn7d1xuFghA==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dev: false
|
||||
|
||||
/file-type@5.2.0:
|
||||
resolution: {integrity: sha512-Iq1nJ6D2+yIO4c8HHg4fyVb8mAJieo1Oloy1mLLaB2PvezNedhBVm+QU7g0qM42aiMbRXTxKKwGD17rjKNJYVQ==}
|
||||
engines: {node: '>=4'}
|
||||
dev: false
|
||||
|
||||
/file-type@6.2.0:
|
||||
resolution: {integrity: sha512-YPcTBDV+2Tm0VqjybVd32MHdlEGAtuxS3VAYsumFokDSMG+ROT5wawGlnHDoz7bfMcMDt9hxuXvXwoKUx2fkOg==}
|
||||
engines: {node: '>=4'}
|
||||
dev: false
|
||||
|
||||
/fill-range@7.0.1:
|
||||
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
|
||||
engines: {node: '>=8'}
|
||||
|
|
@ -7550,6 +7718,10 @@ packages:
|
|||
engines: {node: '>= 0.6'}
|
||||
dev: false
|
||||
|
||||
/fs-constants@1.0.0:
|
||||
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
|
||||
dev: false
|
||||
|
||||
/fs-minipass@2.1.0:
|
||||
resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==}
|
||||
engines: {node: '>= 8'}
|
||||
|
|
@ -7626,6 +7798,14 @@ packages:
|
|||
engines: {node: '>=6'}
|
||||
dev: false
|
||||
|
||||
/get-stream@2.3.1:
|
||||
resolution: {integrity: sha512-AUGhbbemXxrZJRD5cDvKtQxLuYaIbNtDTK8YqupCI393Q2KSTreEsLUN3ZxAWFGiKTzL6nKuzfcIvieflUX9qA==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dependencies:
|
||||
object-assign: 4.1.1
|
||||
pinkie-promise: 2.0.1
|
||||
dev: false
|
||||
|
||||
/get-stream@6.0.1:
|
||||
resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==}
|
||||
engines: {node: '>=10'}
|
||||
|
|
@ -7978,6 +8158,10 @@ packages:
|
|||
safer-buffer: 2.1.2
|
||||
dev: false
|
||||
|
||||
/ieee754@1.2.1:
|
||||
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
|
||||
dev: false
|
||||
|
||||
/ignore@5.3.1:
|
||||
resolution: {integrity: sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==}
|
||||
engines: {node: '>= 4'}
|
||||
|
|
@ -8178,6 +8362,10 @@ packages:
|
|||
engines: {node: '>= 0.4'}
|
||||
dev: true
|
||||
|
||||
/is-natural-number@4.0.1:
|
||||
resolution: {integrity: sha512-Y4LTamMe0DDQIIAlaer9eKebAlDSV6huy+TWhJVPlzZh2o4tRP5SQWFlLn5N0To4mDD22/qdOq+veo1cSISLgQ==}
|
||||
dev: false
|
||||
|
||||
/is-negative-zero@2.0.3:
|
||||
resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
|
@ -8229,6 +8417,11 @@ packages:
|
|||
call-bind: 1.0.7
|
||||
dev: true
|
||||
|
||||
/is-stream@1.1.0:
|
||||
resolution: {integrity: sha512-uQPm8kcs47jx38atAcWTVxyltQYoPT68y9aWYdV6yWXSyW8mzSat0TL6CiWdZeCdF3KrAvpVtnHbTv4RN+rqdQ==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dev: false
|
||||
|
||||
/is-stream@3.0.0:
|
||||
resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
|
||||
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
|
||||
|
|
@ -8704,6 +8897,13 @@ packages:
|
|||
'@jridgewell/sourcemap-codec': 1.4.15
|
||||
dev: true
|
||||
|
||||
/make-dir@1.3.0:
|
||||
resolution: {integrity: sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==}
|
||||
engines: {node: '>=4'}
|
||||
dependencies:
|
||||
pify: 3.0.0
|
||||
dev: false
|
||||
|
||||
/make-dir@3.1.0:
|
||||
resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
|
||||
engines: {node: '>=8'}
|
||||
|
|
@ -9547,6 +9747,14 @@ packages:
|
|||
/node-releases@2.0.14:
|
||||
resolution: {integrity: sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==}
|
||||
|
||||
/node-xlsx@0.23.0:
|
||||
resolution: {integrity: sha512-r3KaSZSsSrK92rbPXnX/vDdxURmPPik0rjJ3A+Pybzpjyrk4G6WyGfj8JIz5dMMEpCmWVpmO4qoVPBxnpLv/8Q==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
hasBin: true
|
||||
dependencies:
|
||||
xlsx: '@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz'
|
||||
dev: false
|
||||
|
||||
/non-layered-tidy-tree-layout@2.0.2:
|
||||
resolution: {integrity: sha512-gkXMxRzUH+PB0ax9dUN0yYF0S25BqeAYqhgMaLUFmpXLEk7Fcu8f4emJuOAY0V8kjDICxROIKsTAKsV/v355xw==}
|
||||
dev: false
|
||||
|
|
@ -9875,17 +10083,26 @@ packages:
|
|||
resolution: {integrity: sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==}
|
||||
dev: true
|
||||
|
||||
/pdfjs-dist@4.0.269:
|
||||
/pdfjs-dist@4.0.269(encoding@0.1.13):
|
||||
resolution: {integrity: sha512-jjWO56tcOjnmPqDf8PmXDeZ781AGvpHMYI3HhNtaFKTRXXPaD1ArSrhVe38/XsrIQJ0onISCND/vuXaWJkiDWw==}
|
||||
engines: {node: '>=18'}
|
||||
optionalDependencies:
|
||||
canvas: 2.11.2
|
||||
canvas: 2.11.2(encoding@0.1.13)
|
||||
path2d-polyfill: 2.1.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- supports-color
|
||||
dev: false
|
||||
|
||||
/peek-readable@5.0.0:
|
||||
resolution: {integrity: sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==}
|
||||
engines: {node: '>=14.16'}
|
||||
dev: false
|
||||
|
||||
/pend@1.2.0:
|
||||
resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
|
||||
dev: false
|
||||
|
||||
/pg-cloudflare@1.1.1:
|
||||
resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==}
|
||||
requiresBuild: true
|
||||
|
|
@ -9979,6 +10196,28 @@ packages:
|
|||
hasBin: true
|
||||
dev: true
|
||||
|
||||
/pify@2.3.0:
|
||||
resolution: {integrity: sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dev: false
|
||||
|
||||
/pify@3.0.0:
|
||||
resolution: {integrity: sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg==}
|
||||
engines: {node: '>=4'}
|
||||
dev: false
|
||||
|
||||
/pinkie-promise@2.0.1:
|
||||
resolution: {integrity: sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dependencies:
|
||||
pinkie: 2.0.4
|
||||
dev: false
|
||||
|
||||
/pinkie@2.0.4:
|
||||
resolution: {integrity: sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
dev: false
|
||||
|
||||
/pkg-types@1.0.3:
|
||||
resolution: {integrity: sha512-nN7pYi0AQqJnoLPC9eHFQ8AcyaixBUOwvqc5TDnIKCMEE6I0y8P7OKA7fPexsXGCGxQDl/cmrLAp26LhcwxZ4A==}
|
||||
dependencies:
|
||||
|
|
@ -10396,7 +10635,13 @@ packages:
|
|||
string_decoder: 1.3.0
|
||||
util-deprecate: 1.0.2
|
||||
dev: false
|
||||
optional: true
|
||||
|
||||
/readable-web-to-node-stream@3.0.2:
|
||||
resolution: {integrity: sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==}
|
||||
engines: {node: '>=8'}
|
||||
dependencies:
|
||||
readable-stream: 3.6.2
|
||||
dev: false
|
||||
|
||||
/readdirp@3.6.0:
|
||||
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
|
||||
|
|
@ -10715,6 +10960,13 @@ packages:
|
|||
dependencies:
|
||||
loose-envify: 1.4.0
|
||||
|
||||
/seek-bzip@1.0.6:
|
||||
resolution: {integrity: sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ==}
|
||||
hasBin: true
|
||||
dependencies:
|
||||
commander: 2.20.3
|
||||
dev: false
|
||||
|
||||
/semver@6.3.1:
|
||||
resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==}
|
||||
hasBin: true
|
||||
|
|
@ -11024,7 +11276,6 @@ packages:
|
|||
dependencies:
|
||||
safe-buffer: 5.2.1
|
||||
dev: false
|
||||
optional: true
|
||||
|
||||
/strip-ansi@6.0.1:
|
||||
resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
|
||||
|
|
@ -11044,6 +11295,12 @@ packages:
|
|||
engines: {node: '>=4'}
|
||||
dev: true
|
||||
|
||||
/strip-dirs@2.1.0:
|
||||
resolution: {integrity: sha512-JOCxOeKLm2CAS73y/U4ZeZPTkE+gNVCzKt7Eox84Iej1LT/2pTWYpZKJuxwQpvX1LiZb1xokNR7RLfuBAa7T3g==}
|
||||
dependencies:
|
||||
is-natural-number: 4.0.1
|
||||
dev: false
|
||||
|
||||
/strip-final-newline@3.0.0:
|
||||
resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==}
|
||||
engines: {node: '>=12'}
|
||||
|
|
@ -11060,6 +11317,14 @@ packages:
|
|||
js-tokens: 8.0.3
|
||||
dev: true
|
||||
|
||||
/strtok3@7.0.0:
|
||||
resolution: {integrity: sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==}
|
||||
engines: {node: '>=14.16'}
|
||||
dependencies:
|
||||
'@tokenizer/token': 0.3.0
|
||||
peek-readable: 5.0.0
|
||||
dev: false
|
||||
|
||||
/style-to-object@0.4.4:
|
||||
resolution: {integrity: sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==}
|
||||
dependencies:
|
||||
|
|
@ -11131,6 +11396,19 @@ packages:
|
|||
engines: {node: '>=6'}
|
||||
dev: true
|
||||
|
||||
/tar-stream@1.6.2:
|
||||
resolution: {integrity: sha512-rzS0heiNf8Xn7/mpdSVVSMAWAoy9bfb1WOTYC78Z0UQKeKa/CWS8FOq0lKGNa8DWKAn9gxjCvMLYc5PGXYlK2A==}
|
||||
engines: {node: '>= 0.8.0'}
|
||||
dependencies:
|
||||
bl: 1.2.3
|
||||
buffer-alloc: 1.2.0
|
||||
end-of-stream: 1.4.4
|
||||
fs-constants: 1.0.0
|
||||
readable-stream: 2.3.8
|
||||
to-buffer: 1.1.1
|
||||
xtend: 4.0.2
|
||||
dev: false
|
||||
|
||||
/tar@6.2.0:
|
||||
resolution: {integrity: sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==}
|
||||
engines: {node: '>=10'}
|
||||
|
|
@ -11149,6 +11427,10 @@ packages:
|
|||
resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==}
|
||||
dev: true
|
||||
|
||||
/through@2.3.8:
|
||||
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
|
||||
dev: false
|
||||
|
||||
/timezones-list@3.0.3:
|
||||
resolution: {integrity: sha512-C+Vdvvj2c1xB6pu81pOX8geo6mrk/QsudFVlTVQET7QQwu8WAIyhDNeCrK5grU7EMzmbKLWqz7uU6dN8fvQvPQ==}
|
||||
dev: false
|
||||
|
|
@ -11171,6 +11453,10 @@ packages:
|
|||
engines: {node: '>=14.0.0'}
|
||||
dev: true
|
||||
|
||||
/to-buffer@1.1.1:
|
||||
resolution: {integrity: sha512-lx9B5iv7msuFYE3dytT+KE5tap+rNYw+K4jVkb9R/asAb+pbBSM17jtunHplhBe6RRJdZx3Pn2Jph24O32mOVg==}
|
||||
dev: false
|
||||
|
||||
/to-fast-properties@2.0.0:
|
||||
resolution: {integrity: sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==}
|
||||
engines: {node: '>=4'}
|
||||
|
|
@ -11190,6 +11476,14 @@ packages:
|
|||
engines: {node: '>=0.6'}
|
||||
dev: false
|
||||
|
||||
/token-types@5.0.1:
|
||||
resolution: {integrity: sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==}
|
||||
engines: {node: '>=14.16'}
|
||||
dependencies:
|
||||
'@tokenizer/token': 0.3.0
|
||||
ieee754: 1.2.1
|
||||
dev: false
|
||||
|
||||
/tr46@0.0.3:
|
||||
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
||||
dev: false
|
||||
|
|
@ -11369,6 +11663,13 @@ packages:
|
|||
which-boxed-primitive: 1.0.2
|
||||
dev: true
|
||||
|
||||
/unbzip2-stream@1.4.3:
|
||||
resolution: {integrity: sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==}
|
||||
dependencies:
|
||||
buffer: 5.7.1
|
||||
through: 2.3.8
|
||||
dev: false
|
||||
|
||||
/underscore@1.13.6:
|
||||
resolution: {integrity: sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==}
|
||||
dev: false
|
||||
|
|
@ -11943,6 +12244,13 @@ packages:
|
|||
engines: {node: '>= 14'}
|
||||
dev: true
|
||||
|
||||
/yauzl@2.10.0:
|
||||
resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==}
|
||||
dependencies:
|
||||
buffer-crc32: 0.2.13
|
||||
fd-slicer: 1.1.0
|
||||
dev: false
|
||||
|
||||
/yjs@13.6.14:
|
||||
resolution: {integrity: sha512-D+7KcUr0j+vBCUSKXXEWfA+bG4UQBviAwP3gYBhkstkgwy5+8diOPMx0iqLIOxNo/HxaREUimZRxqHGAHCL2BQ==}
|
||||
engines: {node: '>=16.0.0', npm: '>=8.0.0'}
|
||||
|
|
@ -12029,3 +12337,11 @@ packages:
|
|||
/zwitch@2.0.4:
|
||||
resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==}
|
||||
dev: false
|
||||
|
||||
'@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz':
|
||||
resolution: {tarball: https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz}
|
||||
name: xlsx
|
||||
version: 0.19.3
|
||||
engines: {node: '>=0.8'}
|
||||
hasBin: true
|
||||
dev: false
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@
|
|||
"name": "Embedding-2",
|
||||
"avatar": "/imgs/model/openai.svg",
|
||||
"charsPointsPrice": 0,
|
||||
"defaultToken": 700,
|
||||
"defaultToken": 512,
|
||||
"maxToken": 3000,
|
||||
"weight": 100,
|
||||
"dbConfig": {},
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
}
|
||||
},
|
||||
"common": {
|
||||
"Action": "Action",
|
||||
"Add": "Add",
|
||||
"Add New": "Add",
|
||||
"All": "All",
|
||||
|
|
@ -79,6 +80,7 @@
|
|||
"Create New": "Create",
|
||||
"Create Success": "Create Success",
|
||||
"Create Time": "Create time",
|
||||
"Creating": "Creating",
|
||||
"Custom Title": "Custom Title",
|
||||
"Delete": "Delete",
|
||||
"Delete Failed": "Delete Failed",
|
||||
|
|
@ -191,6 +193,7 @@
|
|||
"Empty file tip": "The file content is empty. The file may be unreadable or pure image file content.",
|
||||
"File Content": "File Content",
|
||||
"File Name": "File Name",
|
||||
"File Size": "File Size",
|
||||
"File content can not be empty": "File content can not be empty",
|
||||
"Filename Can not Be Empty": "Filename Can not Be Empty",
|
||||
"Read File Error": "Read file error",
|
||||
|
|
@ -198,6 +201,7 @@
|
|||
"Select failed": "Select file failed",
|
||||
"Select file amount limit": "A maximum of {{max}} files can be selected",
|
||||
"Select file amount limit 100": "You can select a maximum of 100 files at a time",
|
||||
"Some file count exceeds limit": "The number of files exceeding {{maxCount}} has been automatically intercepted",
|
||||
"Some file size exceeds limit": "Some files exceed: {{maxSize}}, have been filtered",
|
||||
"Support file type": "Support {{fileType}} files",
|
||||
"Support max count": "A maximum of {{maxCount}} files are supported.",
|
||||
|
|
@ -620,7 +624,7 @@
|
|||
"file": "File",
|
||||
"folder": "Folder",
|
||||
"import": {
|
||||
"Auto mode Estimated Price Tips": "Enhanced processing calls the file processing model: {{price}} integral /1k Tokens",
|
||||
"Auto mode Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
|
||||
"Auto process": "Auto",
|
||||
"Auto process desc": "Automatically set segmentation and preprocessing rules",
|
||||
"CSV Import": "CSV QA Import",
|
||||
|
|
@ -642,7 +646,7 @@
|
|||
"Data file progress": "Data upload progress",
|
||||
"Data process params": "Data process params",
|
||||
"Down load csv template": "Down load csv template",
|
||||
"Embedding Estimated Price Tips": "Index billing: {{price}}/1k Tokens",
|
||||
"Embedding Estimated Price Tips": "Use only the index model and consume a small amount of Tokens: {{price}} credits /1k Tokens",
|
||||
"Estimated Price": "Estimated Price: : {{amount}}{{unit}}",
|
||||
"Estimated Price Tips": "QA charges: {{charsPointsPrice}} points/1k Tokens",
|
||||
"Estimated points": "About {{points}} points",
|
||||
|
|
@ -657,15 +661,19 @@
|
|||
"Import Failed": "Import Failed",
|
||||
"Import Success Tip": "The {{num}} group data is imported successfully. Please wait for training.",
|
||||
"Import Tip": "This task cannot be terminated and takes some time to generate indexes. Please confirm the import. If the balance is insufficient, the unfinished task will be suspended and can continue after topping up.",
|
||||
"Import success": "Import successful, please wait for training",
|
||||
"Link name": "Link name",
|
||||
"Link name placeholder": "Only static links are supported\nOne per line, up to 10 links at a time",
|
||||
"Local file": "Local file",
|
||||
"Local file desc": "Upload files in PDF, TXT, DOCX and other formats",
|
||||
"Only Show First 50 Chunk": "Show only part",
|
||||
"Preview chunks": "Chunks",
|
||||
"Preview raw text": "Preview file text (max show 10000 words)",
|
||||
"Predicted chunk": "Predicted chunk",
|
||||
"Predicted chunk amount": "Predicted chunks:{{amount}}",
|
||||
"Predicted total chars": "Predicted chars: {{total}}",
|
||||
"Preview chunks": "Preview chunks",
|
||||
"Preview raw text": "Preview file text (max show 3000 words)",
|
||||
"Process way": "Process way",
|
||||
"QA Estimated Price Tips": "QA billing: {{price}}/1k Tokens (including input and output)",
|
||||
"QA Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
|
||||
"QA Import": "QA Split",
|
||||
"QA Import Tip": "According to certain rules, the text is broken into a larger paragraph, and the AI is invoked to generate a question and answer pair for the paragraph.",
|
||||
"Re Preview": "RePreview",
|
||||
|
|
@ -680,8 +688,8 @@
|
|||
"Total tokens": "Tokens",
|
||||
"Training mode": "Training mode",
|
||||
"Upload data": "Upload data",
|
||||
"Upload file progress": "File upload progress",
|
||||
"Upload status": "Upload status",
|
||||
"Upload file progress": "Upload state",
|
||||
"Upload status": "Status",
|
||||
"Upload success": "Upload success",
|
||||
"Web link": "Web link",
|
||||
"Web link desc": "Fetch static web content as a collection"
|
||||
|
|
@ -1348,6 +1356,7 @@
|
|||
"Pay error": "Pay error",
|
||||
"Pay success": "Pay success",
|
||||
"Plan expired time": "Plan expired time",
|
||||
"Plan reset time": "Plan reset time",
|
||||
"Standard Plan Detail": "Standard Plan Detail",
|
||||
"To read plan": "Read plan",
|
||||
"bill": {
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
}
|
||||
},
|
||||
"common": {
|
||||
"Action": "操作",
|
||||
"Add": "添加",
|
||||
"Add New": "新增",
|
||||
"All": "全部",
|
||||
|
|
@ -79,6 +80,7 @@
|
|||
"Create New": "新建",
|
||||
"Create Success": "创建成功",
|
||||
"Create Time": "创建时间",
|
||||
"Creating": "创建中",
|
||||
"Custom Title": "自定义标题",
|
||||
"Delete": "删除",
|
||||
"Delete Failed": "删除失败",
|
||||
|
|
@ -191,6 +193,7 @@
|
|||
"Empty file tip": "文件内容为空,可能该文件无法读取或为纯图片文件内容。",
|
||||
"File Content": "文件内容",
|
||||
"File Name": "文件名",
|
||||
"File Size": "文件大小",
|
||||
"File content can not be empty": "文件内容不能为空",
|
||||
"Filename Can not Be Empty": "文件名不能为空",
|
||||
"Read File Error": "解析文件失败",
|
||||
|
|
@ -198,6 +201,7 @@
|
|||
"Select failed": "选择文件异常",
|
||||
"Select file amount limit": "最多选择 {{max}} 个文件",
|
||||
"Select file amount limit 100": "每次最多选择100个文件",
|
||||
"Some file count exceeds limit": "超出{{maxCount}}个文件,已自动截取",
|
||||
"Some file size exceeds limit": "部分文件超出: {{maxSize}},已被过滤",
|
||||
"Support file type": "支持 {{fileType}} 类型文件",
|
||||
"Support max count": "最多支持 {{maxCount}} 个文件。",
|
||||
|
|
@ -622,7 +626,7 @@
|
|||
"file": "文件",
|
||||
"folder": "目录",
|
||||
"import": {
|
||||
"Auto mode Estimated Price Tips": "增强处理需调用文件处理模型: {{price}}积分/1k Tokens",
|
||||
"Auto mode Estimated Price Tips": "需调用文件处理模型,需要消耗较多Tokens: {{price}}积分/1k Tokens",
|
||||
"Auto process": "自动",
|
||||
"Auto process desc": "自动设置分割和预处理规则",
|
||||
"CSV Import": "CSV 导入",
|
||||
|
|
@ -644,7 +648,7 @@
|
|||
"Data file progress": "数据上传进度",
|
||||
"Data process params": "数据处理参数",
|
||||
"Down load csv template": "点击下载 CSV 模板",
|
||||
"Embedding Estimated Price Tips": "索引计费: {{price}}积分/1k Tokens",
|
||||
"Embedding Estimated Price Tips": "仅使用索引模型,消耗少量Tokens: {{price}}积分/1k Tokens",
|
||||
"Estimated Price": "预估价格: {{amount}}{{unit}}",
|
||||
"Estimated Price Tips": "QA计费为\n输入: {{charsPointsPrice}}积分/1k Tokens",
|
||||
"Estimated points": "预估消耗 {{points}} 积分",
|
||||
|
|
@ -659,15 +663,19 @@
|
|||
"Import Failed": "导入文件失败",
|
||||
"Import Success Tip": "共成功导入 {{num}} 组数据,请耐心等待训练.",
|
||||
"Import Tip": "该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。",
|
||||
"Import success": "导入成功,请等待训练",
|
||||
"Link name": "网络链接",
|
||||
"Link name placeholder": "仅支持静态链接,如果上传后数据为空,可能该链接无法被读取\n每行一个,每次最多 10 个链接",
|
||||
"Local file": "本地文件",
|
||||
"Local file desc": "上传 PDF, TXT, DOCX 等格式的文件",
|
||||
"Only Show First 50 Chunk": "仅展示部分",
|
||||
"Preview chunks": "分段预览",
|
||||
"Preview raw text": "预览源文本(最多展示10000字)",
|
||||
"Predicted chunk": "预估分段",
|
||||
"Predicted chunk amount": "预估分段:{{amount}}",
|
||||
"Predicted total chars": "预估字数: {{total}}",
|
||||
"Preview chunks": "预览分段(最多5段)",
|
||||
"Preview raw text": "预览源文本(最多3000字)",
|
||||
"Process way": "处理方式",
|
||||
"QA Estimated Price Tips": "QA计费为: {{price}}积分/1k Tokens(包含输入和输出)",
|
||||
"QA Estimated Price Tips": "需调用文件处理模型,需要消耗较多Tokens: {{price}}积分/1k Tokens",
|
||||
"QA Import": "QA拆分",
|
||||
"QA Import Tip": "根据一定规则,将文本拆成一段较大的段落,调用 AI 为该段落生成问答对。有非常高的检索精度,但是会丢失很多内容细节。",
|
||||
"Re Preview": "重新生成预览",
|
||||
|
|
@ -683,7 +691,7 @@
|
|||
"Training mode": "训练模式",
|
||||
"Upload data": "上传数据",
|
||||
"Upload file progress": "文件上传进度",
|
||||
"Upload status": "上传状态",
|
||||
"Upload status": "状态",
|
||||
"Upload success": "上传成功",
|
||||
"Web link": "网页链接",
|
||||
"Web link desc": "读取静态网页内容作为数据集"
|
||||
|
|
@ -1350,6 +1358,7 @@
|
|||
"Pay error": "支付失败",
|
||||
"Pay success": "支付成功",
|
||||
"Plan expired time": "套餐到期时间",
|
||||
"Plan reset time": "套餐重置时间",
|
||||
"Standard Plan Detail": "套餐详情",
|
||||
"To read plan": "查看套餐",
|
||||
"bill": {
|
||||
|
|
@ -1407,7 +1416,7 @@
|
|||
"Standard update fail": "修改订阅套餐异常",
|
||||
"Standard update success": "变更订阅套餐成功!",
|
||||
"Sub plan": "订阅套餐",
|
||||
"Sub plan tip": "免费使用 FastGPT 或升级更高的套餐",
|
||||
"Sub plan tip": "免费使用 {{title}} 或升级更高的套餐",
|
||||
"Team plan and usage": "套餐与用量",
|
||||
"Training weight": "训练优先级: {{weight}}",
|
||||
"Update extra ai points": "额外AI积分",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
|||
import {
|
||||
DatasetSearchModeEnum,
|
||||
DatasetTypeEnum,
|
||||
ImportDataSourceEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
|
|
@ -67,3 +68,24 @@ export type SearchTestResponse = {
|
|||
similarity: number;
|
||||
usingQueryExtension: boolean;
|
||||
};
|
||||
|
||||
/* =========== training =========== */
|
||||
export type PostPreviewFilesChunksProps = {
|
||||
type: `${ImportDataSourceEnum}`;
|
||||
sourceId: string;
|
||||
chunkSize: number;
|
||||
overlapRatio: number;
|
||||
customSplitChar?: string;
|
||||
};
|
||||
|
||||
export type PostPreviewFilesChunksResponse = {
|
||||
fileId: string;
|
||||
rawTextLength: number;
|
||||
chunks: string[];
|
||||
}[];
|
||||
export type PostPreviewTableChunksResponse = {
|
||||
fileId: string;
|
||||
totalChunks: number;
|
||||
chunks: { q: string; a: string; chunkIndex: number }[];
|
||||
errorText?: string;
|
||||
}[];
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
|
||||
/* ================= data ===================== */
|
||||
|
|
@ -397,14 +397,22 @@ const PlanUsage = () => {
|
|||
<Box fontWeight={'bold'} fontSize="xl">
|
||||
{t(planName)}
|
||||
</Box>
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan expired time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
{isFreeTeam && (
|
||||
<Box mt="2" color={'#485264'} fontSize="sm">
|
||||
免费版用户30天无任何使用记录时,系统会自动清理账号知识库。
|
||||
</Box>
|
||||
|
||||
{isFreeTeam ? (
|
||||
<>
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan reset time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
<Box mt="2" color={'#485264'} fontSize="sm">
|
||||
免费版用户30天无任何使用记录时,系统会自动清理账号知识库。
|
||||
</Box>
|
||||
</>
|
||||
) : (
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan expired time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
)}
|
||||
</Box>
|
||||
<Button onClick={() => router.push('/price')}>
|
||||
|
|
|
|||
|
|
@ -2,51 +2,15 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
|
||||
/*
|
||||
检测无效的数据集图片
|
||||
|
||||
可能异常情况:
|
||||
1. 上传文件过程中,上传了图片,但是最终没有创建数据集。
|
||||
*/
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const {
|
||||
startHour = 72,
|
||||
endHour = 24,
|
||||
limit = 10
|
||||
} = req.body as { startHour?: number; endHour?: number; limit?: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
|
||||
// start: now - maxDay, end: now - 3 day
|
||||
const start = addHours(new Date(), -startHour);
|
||||
const end = addHours(new Date(), -endHour);
|
||||
deleteImageAmount = 0;
|
||||
|
||||
await checkInvalid(start, end, limit);
|
||||
|
||||
jsonRes(res, {
|
||||
data: deleteImageAmount
|
||||
});
|
||||
} catch (error) {
|
||||
addLog.error(`check Invalid user error`, error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkInvalid(start: Date, end: Date, limit = 50) {
|
||||
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
const images = await MongoImage.find(
|
||||
{
|
||||
createTime: {
|
||||
|
|
@ -86,3 +50,37 @@ export async function checkInvalid(start: Date, end: Date, limit = 50) {
|
|||
|
||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||
}
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log('执行脏数据清理任务');
|
||||
const end = addHours(new Date(), -1);
|
||||
const start = addHours(new Date(), -360 * 24);
|
||||
await checkFiles(start, end);
|
||||
await checkInvalidImg(start, end);
|
||||
await checkInvalidCollection(start, end);
|
||||
await checkInvalidVector(start, end);
|
||||
console.log('执行脏数据清理任务完毕');
|
||||
} catch (error) {
|
||||
console.log('执行脏数据清理任务出错了');
|
||||
}
|
||||
})();
|
||||
|
||||
jsonRes(res, {
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,52 @@ import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
|
|||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
const images = await MongoImage.find(
|
||||
{
|
||||
createTime: {
|
||||
$gte: start,
|
||||
$lte: end
|
||||
},
|
||||
'metadata.relatedId': { $exists: true }
|
||||
},
|
||||
'_id teamId metadata'
|
||||
);
|
||||
console.log('total images', images.length);
|
||||
let index = 0;
|
||||
|
||||
for await (const image of images) {
|
||||
try {
|
||||
// 1. 检测是否有对应的集合
|
||||
const collection = await MongoDatasetCollection.findOne(
|
||||
{
|
||||
teamId: image.teamId,
|
||||
'metadata.relatedImgId': image.metadata?.relatedId
|
||||
},
|
||||
'_id'
|
||||
);
|
||||
|
||||
if (!collection) {
|
||||
await image.deleteOne();
|
||||
deleteImageAmount++;
|
||||
}
|
||||
|
||||
index++;
|
||||
|
||||
index % 100 === 0 && console.log(index);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||
}
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
|
|
|
|||
|
|
@ -2,13 +2,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
|
||||
import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
Read db file content and response 3000 words
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { fileId, csvFormat } = req.body as { fileId: string; csvFormat?: boolean };
|
||||
|
||||
if (!fileId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId } = await authFile({ req, authToken: true, fileId });
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
previewContent: rawText.slice(0, 3000),
|
||||
totalLength: rawText.length
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -2,9 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import {
|
||||
getDownloadStream,
|
||||
getFileById,
|
||||
readFileEncode
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
|
|
@ -18,8 +21,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const [file, encodeStream] = await Promise.all([
|
||||
const [file, encoding, fileStream] = await Promise.all([
|
||||
getFileById({ bucketName, fileId }),
|
||||
readFileEncode({ bucketName, fileId }),
|
||||
getDownloadStream({ bucketName, fileId })
|
||||
]);
|
||||
|
||||
|
|
@ -27,24 +31,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||
return Promise.reject(CommonErrEnum.fileNotFound);
|
||||
}
|
||||
|
||||
// get encoding
|
||||
let buffers: Buffer = Buffer.from([]);
|
||||
for await (const chunk of encodeStream) {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
if (buffers.length > 10) {
|
||||
encodeStream.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const encoding = detectFileEncoding(buffers);
|
||||
|
||||
res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`);
|
||||
res.setHeader('Cache-Control', 'public, max-age=3600');
|
||||
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
|
||||
|
||||
const fileStream = await getDownloadStream({ bucketName, fileId });
|
||||
|
||||
fileStream.pipe(res);
|
||||
|
||||
fileStream.on('error', () => {
|
||||
|
|
|
|||
|
|
@ -4,24 +4,22 @@ import { connectToDatabase } from '@/service/mongo';
|
|||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: 500 * 1024 * 1024
|
||||
});
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let filePaths: string[] = [];
|
||||
/* Creates the multer uploader */
|
||||
const upload = getUploadModel({
|
||||
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
|
||||
});
|
||||
const filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
await connectToDatabase();
|
||||
const { file, bucketName, metadata } = await upload.doUpload(req, res);
|
||||
|
||||
filePaths = [file.path];
|
||||
await connectToDatabase();
|
||||
filePaths.push(file.path);
|
||||
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
if (!bucketName) {
|
||||
throw new Error('bucketName is empty');
|
||||
|
|
@ -46,6 +44,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||
error
|
||||
});
|
||||
}
|
||||
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
|
|
|
|||
|
|
@ -12,12 +12,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||
|
||||
const { teamId } = await authChatCert({ req, authToken: true });
|
||||
|
||||
const data = await uploadMongoImg({
|
||||
const imgId = await uploadMongoImg({
|
||||
teamId,
|
||||
...body
|
||||
});
|
||||
|
||||
jsonRes(res, { data });
|
||||
jsonRes(res, { data: imgId });
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,112 @@
|
|||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { datasetId, parentId, fileId } = req.body as FileIdCreateDatasetCollectionParams;
|
||||
const trainingType = TrainingModeEnum.chunk;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const { chunks = [] } = parseCsvTable2Chunks(rawText);
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
name: filename,
|
||||
parentId,
|
||||
datasetId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
fileId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize: 0,
|
||||
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
billId,
|
||||
data: chunks.map((chunk, index) => ({
|
||||
q: chunk.q,
|
||||
a: chunk.a,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -1,94 +1,151 @@
|
|||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { delFileByFileIdList, uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import {
|
||||
delFileByFileIdList,
|
||||
readFileContent
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: 500 * 1024 * 1024
|
||||
});
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let filePaths: string[] = [];
|
||||
let fileId: string = '';
|
||||
const { datasetId } = req.query as { datasetId: string };
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as FileIdCreateDatasetCollectionParams;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId } = await authDataset({
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
const { file, bucketName, data } = await upload.doUpload<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res
|
||||
);
|
||||
filePaths = [file.path];
|
||||
|
||||
if (!file || !bucketName) {
|
||||
throw new Error('file is empty');
|
||||
}
|
||||
|
||||
const { fileMetadata, collectionMetadata, ...collectionData } = data;
|
||||
|
||||
// upload file and create collection
|
||||
fileId = await uploadFile({
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName,
|
||||
path: file.path,
|
||||
filename: file.originalname,
|
||||
contentType: file.mimetype,
|
||||
metadata: fileMetadata
|
||||
});
|
||||
|
||||
// create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...collectionData,
|
||||
metadata: collectionMetadata,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: collectionId
|
||||
// 2. split chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
if (fileId) {
|
||||
try {
|
||||
await delFileByFileIdList({
|
||||
fileIdList: [fileId],
|
||||
bucketName: BucketNameEnum.dataset
|
||||
});
|
||||
} catch (error) {}
|
||||
}
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
|
|||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
|
|
@ -55,9 +56,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 3. create collection and training bill
|
||||
const [{ _id: collectionId }, { billId }] = await Promise.all([
|
||||
createOneCollection({
|
||||
const createResult = await mongoSessionRun(async (session) => {
|
||||
// 3. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
|
|
@ -70,34 +71,44 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(text),
|
||||
rawTextLength: text.length
|
||||
}),
|
||||
createTrainingUsage({
|
||||
rawTextLength: text.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: name,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
})
|
||||
]);
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
collectionId,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
// 5. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
return { collectionId, results: insertResults };
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
data: createResult
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
|
|||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { collectionId, data } = req.body as PushDatasetDataProps;
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
const { collectionId, data } = body;
|
||||
|
||||
if (!collectionId || !Array.isArray(data)) {
|
||||
throw new Error('collectionId or data is empty');
|
||||
|
|
@ -42,9 +43,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
|||
|
||||
jsonRes<PushDatasetDataResponse>(res, {
|
||||
data: await pushDataListToTrainingQueue({
|
||||
...req.body,
|
||||
...body,
|
||||
teamId,
|
||||
tmbId
|
||||
tmbId,
|
||||
datasetId: collection.datasetId._id,
|
||||
agentModel: collection.datasetId.agentModel,
|
||||
vectorModel: collection.datasetId.vectorModel
|
||||
})
|
||||
});
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { PostPreviewFilesChunksProps } from '@/global/core/dataset/api';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { type, sourceId, chunkSize, customSplitChar, overlapRatio } =
|
||||
req.body as PostPreviewFilesChunksProps;
|
||||
|
||||
if (!sourceId) {
|
||||
throw new Error('fileIdList is empty');
|
||||
}
|
||||
if (chunkSize > 30000) {
|
||||
throw new Error('chunkSize is too large, should be less than 30000');
|
||||
}
|
||||
|
||||
const { chunks } = await (async () => {
|
||||
if (type === ImportDataSourceEnum.fileLocal) {
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat: true
|
||||
});
|
||||
// split chunks (5 chunk)
|
||||
const sliceRawText = 10 * chunkSize;
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText.slice(0, sliceRawText),
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
|
||||
return {
|
||||
chunks: chunks.map((item) => ({
|
||||
q: item,
|
||||
a: ''
|
||||
}))
|
||||
};
|
||||
}
|
||||
if (type === ImportDataSourceEnum.csvTable) {
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat: false
|
||||
});
|
||||
const { chunks } = parseCsvTable2Chunks(rawText);
|
||||
|
||||
return {
|
||||
chunks: chunks || []
|
||||
};
|
||||
}
|
||||
return { chunks: [] };
|
||||
})();
|
||||
|
||||
jsonRes<{ q: string; a: string }[]>(res, {
|
||||
data: chunks.slice(0, 5)
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -32,7 +32,6 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
|
|||
import MyInput from '@/components/MyInput';
|
||||
import dayjs from 'dayjs';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useLoading } from '@fastgpt/web/hooks/useLoading';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyMenu from '@/components/MyMenu';
|
||||
|
|
@ -62,11 +61,11 @@ import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
|||
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import MyBox from '@/components/common/MyBox';
|
||||
import { ImportDataSourceEnum } from './Import';
|
||||
import { usePagination } from '@fastgpt/web/hooks/usePagination';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
|
||||
const FileSourceSelector = dynamic(() => import('./Import/sourceSelector/FileSourceSelector'), {});
|
||||
const FileSourceSelector = dynamic(() => import('./Import/components/FileSourceSelector'), {});
|
||||
|
||||
const CollectionCard = () => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
|
|
@ -76,14 +75,14 @@ const CollectionCard = () => {
|
|||
const { toast } = useToast();
|
||||
const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string };
|
||||
const { t } = useTranslation();
|
||||
const { Loading } = useLoading();
|
||||
const { isPc } = useSystemStore();
|
||||
const { userInfo } = useUserStore();
|
||||
const [searchText, setSearchText] = useState('');
|
||||
const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore();
|
||||
|
||||
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
|
||||
content: t('dataset.Confirm to delete the file')
|
||||
content: t('dataset.Confirm to delete the file'),
|
||||
type: 'delete'
|
||||
});
|
||||
const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({
|
||||
content: t('core.dataset.collection.Start Sync Tip')
|
||||
|
|
@ -452,7 +451,7 @@ const CollectionCard = () => {
|
|||
query: {
|
||||
...router.query,
|
||||
currentTab: TabEnum.import,
|
||||
source: ImportDataSourceEnum.tableLocal
|
||||
source: ImportDataSourceEnum.csvTable
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import React, { useContext, useCallback, createContext, useState, useMemo, useEffect } from 'react';
|
||||
import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
|
||||
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
|
||||
|
|
@ -8,6 +7,7 @@ import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
|||
import { UseFormReturn, useForm } from 'react-hook-form';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize';
|
||||
export type FormType = {
|
||||
|
|
@ -29,14 +29,11 @@ type useImportStoreType = {
|
|||
showPromptInput: boolean;
|
||||
sources: ImportSourceItemType[];
|
||||
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
showRePreview: boolean;
|
||||
totalChunkChars: number;
|
||||
totalChunks: number;
|
||||
chunkSize: number;
|
||||
predictPoints: number;
|
||||
chunkOverlapRatio: number;
|
||||
priceTip: string;
|
||||
uploadRate: number;
|
||||
splitSources2Chunks: () => void;
|
||||
importSource: `${ImportDataSourceEnum}`;
|
||||
};
|
||||
const StateContext = createContext<useImportStoreType>({
|
||||
processParamsForm: {} as any,
|
||||
|
|
@ -49,23 +46,22 @@ const StateContext = createContext<useImportStoreType>({
|
|||
showChunkInput: false,
|
||||
showPromptInput: false,
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
showRePreview: false,
|
||||
totalChunkChars: 0,
|
||||
totalChunks: 0,
|
||||
chunkSize: 0,
|
||||
predictPoints: 0,
|
||||
chunkOverlapRatio: 0,
|
||||
priceTip: '',
|
||||
uploadRate: 50,
|
||||
splitSources2Chunks: () => {}
|
||||
importSource: ImportDataSourceEnum.fileLocal
|
||||
});
|
||||
|
||||
export const useImportStore = () => useContext(StateContext);
|
||||
|
||||
const Provider = ({
|
||||
importSource,
|
||||
dataset,
|
||||
parentId,
|
||||
children
|
||||
}: {
|
||||
importSource: `${ImportDataSourceEnum}`;
|
||||
dataset: DatasetItemType;
|
||||
parentId?: string;
|
||||
children: React.ReactNode;
|
||||
|
|
@ -86,7 +82,6 @@ const Provider = ({
|
|||
|
||||
const { t } = useTranslation();
|
||||
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
|
||||
const [showRePreview, setShowRePreview] = useState(false);
|
||||
|
||||
// watch form
|
||||
const mode = processParamsForm.watch('mode');
|
||||
|
|
@ -154,68 +149,15 @@ const Provider = ({
|
|||
|
||||
const chunkSize = wayStaticPrams[way].chunkSize;
|
||||
|
||||
useEffect(() => {
|
||||
setShowRePreview(true);
|
||||
}, [mode, way, chunkSize, customSplitChar]);
|
||||
|
||||
const totalChunkChars = useMemo(
|
||||
() => sources.reduce((sum, file) => sum + file.chunkChars, 0),
|
||||
[sources]
|
||||
);
|
||||
const predictPoints = useMemo(() => {
|
||||
const totalTokensPredict = totalChunkChars / 1000;
|
||||
if (mode === TrainingModeEnum.auto) {
|
||||
const price = totalTokensPredict * 1.3 * agentModel.charsPointsPrice;
|
||||
return +price.toFixed(2);
|
||||
}
|
||||
if (mode === TrainingModeEnum.qa) {
|
||||
const price = totalTokensPredict * 1.2 * agentModel.charsPointsPrice;
|
||||
return +price.toFixed(2);
|
||||
}
|
||||
|
||||
return +(totalTokensPredict * vectorModel.charsPointsPrice).toFixed(2);
|
||||
}, [agentModel.charsPointsPrice, mode, totalChunkChars, vectorModel.charsPointsPrice]);
|
||||
const totalChunks = useMemo(
|
||||
() => sources.reduce((sum, file) => sum + file.chunks.length, 0),
|
||||
[sources]
|
||||
);
|
||||
|
||||
const splitSources2Chunks = useCallback(() => {
|
||||
setSources((state) =>
|
||||
state.map((file) => {
|
||||
const { chunks, chars } = splitText2Chunks({
|
||||
text: file.rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: selectModelStaticParam.chunkOverlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
|
||||
return {
|
||||
...file,
|
||||
chunkChars: chars,
|
||||
chunks: chunks.map((chunk, i) => ({
|
||||
chunkIndex: i,
|
||||
q: chunk,
|
||||
a: ''
|
||||
}))
|
||||
};
|
||||
})
|
||||
);
|
||||
setShowRePreview(false);
|
||||
}, [chunkSize, customSplitChar, selectModelStaticParam.chunkOverlapRatio]);
|
||||
|
||||
const value = {
|
||||
const value: useImportStoreType = {
|
||||
parentId,
|
||||
processParamsForm,
|
||||
...selectModelStaticParam,
|
||||
sources,
|
||||
setSources,
|
||||
showRePreview,
|
||||
totalChunkChars,
|
||||
totalChunks,
|
||||
chunkSize,
|
||||
predictPoints,
|
||||
splitSources2Chunks
|
||||
|
||||
importSource
|
||||
};
|
||||
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import React, { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import React, { useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
|
|
@ -21,11 +21,11 @@ import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
|||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useImportStore } from '../Provider';
|
||||
import Tag from '@/components/Tag';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import Preview from '../components/Preview';
|
||||
import Tag from '@/components/Tag';
|
||||
|
||||
function DataProcess({
|
||||
showPreviewChunks = true,
|
||||
|
|
@ -38,17 +38,11 @@ function DataProcess({
|
|||
const { feConfigs } = useSystemStore();
|
||||
const {
|
||||
processParamsForm,
|
||||
sources,
|
||||
chunkSizeField,
|
||||
minChunkSize,
|
||||
showChunkInput,
|
||||
showPromptInput,
|
||||
maxChunkSize,
|
||||
totalChunkChars,
|
||||
totalChunks,
|
||||
predictPoints,
|
||||
showRePreview,
|
||||
splitSources2Chunks,
|
||||
priceTip
|
||||
} = useImportStore();
|
||||
const { getValues, setValue, register } = processParamsForm;
|
||||
|
|
@ -69,16 +63,10 @@ function DataProcess({
|
|||
});
|
||||
}, [feConfigs?.isPlus]);
|
||||
|
||||
useEffect(() => {
|
||||
if (showPreviewChunks) {
|
||||
splitSources2Chunks();
|
||||
}
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box h={'100%'} display={['block', 'flex']} gap={5}>
|
||||
<Box flex={'1 0 0'} maxW={'600px'}>
|
||||
<Flex fontWeight={'bold'} alignItems={'center'}>
|
||||
<Box flex={'1 0 0'} minW={['auto', '540px']} maxW={'600px'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'common/settingLight'} w={'20px'} />
|
||||
<Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box>
|
||||
</Flex>
|
||||
|
|
@ -273,34 +261,18 @@ function DataProcess({
|
|||
}}
|
||||
></LeftRadio>
|
||||
</Flex>
|
||||
{showPreviewChunks && (
|
||||
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.dataset.Total chunks', { total: totalChunks })}
|
||||
</Tag>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.Total chars', { total: totalChunkChars })}
|
||||
</Tag>
|
||||
{feConfigs?.show_pay && (
|
||||
<MyTooltip label={priceTip}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.dataset.import.Estimated points', { points: predictPoints })}
|
||||
</Tag>
|
||||
</MyTooltip>
|
||||
)}
|
||||
</Flex>
|
||||
)}
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
{showPreviewChunks && showRePreview && (
|
||||
<Button variant={'primaryOutline'} onClick={splitSources2Chunks}>
|
||||
{t('core.dataset.import.Re Preview')}
|
||||
</Button>
|
||||
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
|
||||
{feConfigs?.show_pay && (
|
||||
<MyTooltip label={priceTip}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{priceTip}
|
||||
</Tag>
|
||||
</MyTooltip>
|
||||
)}
|
||||
</Flex>
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
if (showRePreview) {
|
||||
splitSources2Chunks();
|
||||
}
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
|
|
@ -308,7 +280,9 @@ function DataProcess({
|
|||
</Button>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Preview sources={sources} showPreviewChunks={showPreviewChunks} />
|
||||
<Box flex={'1 0 0'} w={'0'}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
</Box>
|
||||
|
||||
{isOpenCustomPrompt && (
|
||||
<PromptTextarea
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import React from 'react';
|
||||
import { useImportStore } from '../Provider';
|
||||
import Preview from '../components/Preview';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
|
@ -12,12 +11,11 @@ const PreviewData = ({
|
|||
goToNext: () => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { sources, setSources } = useImportStore();
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} h={'100%'}>
|
||||
<Box flex={'1 0 0 '}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} sources={sources} />
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
</Box>
|
||||
<Flex mt={2} justifyContent={'flex-end'}>
|
||||
<Button onClick={goToNext}>{t('common.Next Step')}</Button>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import React, { useEffect, useState } from 'react';
|
||||
import React from 'react';
|
||||
import {
|
||||
Box,
|
||||
TableContainer,
|
||||
|
|
@ -8,164 +8,109 @@ import {
|
|||
Th,
|
||||
Td,
|
||||
Tbody,
|
||||
Progress,
|
||||
Flex,
|
||||
Button
|
||||
} from '@chakra-ui/react';
|
||||
import { useImportStore, type FormType } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { chunksUpload, fileCollectionCreate } from '@/web/core/dataset/utils';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../../index';
|
||||
import { postCreateDatasetLinkCollection, postDatasetCollection } from '@/web/core/dataset/api';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { checkTeamDatasetSizeLimit } from '@/web/support/user/team/api';
|
||||
import {
|
||||
postCreateDatasetCsvTableCollection,
|
||||
postCreateDatasetFileCollection,
|
||||
postCreateDatasetLinkCollection,
|
||||
postCreateDatasetTextCollection
|
||||
} from '@/web/core/dataset/api';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import Tag from '@/components/Tag';
|
||||
|
||||
const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const Upload = () => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const router = useRouter();
|
||||
const { datasetDetail } = useDatasetStore();
|
||||
const { parentId, sources, processParamsForm, chunkSize, totalChunks, uploadRate } =
|
||||
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
|
||||
useImportStore();
|
||||
const [uploadList, setUploadList] = useState<
|
||||
(ImportSourceItemType & {
|
||||
uploadedFileRate: number;
|
||||
uploadedChunksRate: number;
|
||||
})[]
|
||||
>([]);
|
||||
|
||||
const { handleSubmit } = processParamsForm;
|
||||
|
||||
const { mutate: startUpload, isLoading } = useRequest({
|
||||
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
|
||||
if (uploadList.length === 0) return;
|
||||
|
||||
await checkTeamDatasetSizeLimit(totalChunks);
|
||||
|
||||
let totalInsertion = 0;
|
||||
if (sources.length === 0) return;
|
||||
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
|
||||
|
||||
// Batch create collection and upload chunks
|
||||
for await (const item of uploadList) {
|
||||
// create collection
|
||||
const collectionId = await (async () => {
|
||||
const commonParams = {
|
||||
parentId,
|
||||
trainingType: mode,
|
||||
datasetId: datasetDetail._id,
|
||||
chunkSize,
|
||||
chunkSplitter: customSplitChar,
|
||||
qaPrompt,
|
||||
|
||||
name: item.sourceName,
|
||||
rawTextLength: item.rawText.length,
|
||||
hashRawText: hashStr(item.rawText)
|
||||
};
|
||||
if (item.file) {
|
||||
return fileCollectionCreate({
|
||||
file: item.file,
|
||||
data: {
|
||||
...commonParams,
|
||||
collectionMetadata: {
|
||||
relatedImgId: item.id
|
||||
for await (const item of filterWaitingSources) {
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.id === item.id
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'creating'
|
||||
}
|
||||
},
|
||||
percentListen: (e) => {
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedFileRate: e
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
} else if (item.link) {
|
||||
const { collectionId } = await postCreateDatasetLinkCollection({
|
||||
...commonParams,
|
||||
link: item.link,
|
||||
metadata: {
|
||||
webPageSelector: webSelector
|
||||
}
|
||||
});
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedFileRate: 100
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
return collectionId;
|
||||
} else if (item.rawText) {
|
||||
// manual collection
|
||||
return postDatasetCollection({
|
||||
...commonParams,
|
||||
type: DatasetCollectionTypeEnum.virtual
|
||||
});
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
: source
|
||||
)
|
||||
);
|
||||
|
||||
if (!collectionId) continue;
|
||||
if (item.link) continue;
|
||||
// create collection
|
||||
const commonParams = {
|
||||
parentId,
|
||||
trainingType: mode,
|
||||
datasetId: datasetDetail._id,
|
||||
chunkSize,
|
||||
chunkSplitter: customSplitChar,
|
||||
qaPrompt,
|
||||
|
||||
const billId = await postCreateTrainingUsage({
|
||||
name: item.sourceName,
|
||||
datasetId: datasetDetail._id
|
||||
});
|
||||
name: item.sourceName
|
||||
};
|
||||
if (importSource === ImportDataSourceEnum.fileLocal && item.dbFileId) {
|
||||
await postCreateDatasetFileCollection({
|
||||
...commonParams,
|
||||
fileId: item.dbFileId
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileLink && item.link) {
|
||||
await postCreateDatasetLinkCollection({
|
||||
...commonParams,
|
||||
link: item.link,
|
||||
metadata: {
|
||||
webPageSelector: webSelector
|
||||
}
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileCustom && item.rawText) {
|
||||
// manual collection
|
||||
await postCreateDatasetTextCollection({
|
||||
...commonParams,
|
||||
text: item.rawText
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
|
||||
await postCreateDatasetCsvTableCollection({
|
||||
...commonParams,
|
||||
fileId: item.dbFileId
|
||||
});
|
||||
}
|
||||
|
||||
// upload chunks
|
||||
const chunks = item.chunks;
|
||||
const { insertLen } = await chunksUpload({
|
||||
collectionId,
|
||||
billId,
|
||||
trainingMode: mode,
|
||||
chunks,
|
||||
rate: uploadRate,
|
||||
onUploading: (e) => {
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedChunksRate: e
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
},
|
||||
prompt: qaPrompt
|
||||
});
|
||||
totalInsertion += insertLen;
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.id === item.id
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'finish'
|
||||
}
|
||||
: source
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return totalInsertion;
|
||||
},
|
||||
onSuccess(num) {
|
||||
if (showPreviewChunks) {
|
||||
toast({
|
||||
title: t('core.dataset.import.Import Success Tip', { num }),
|
||||
status: 'success'
|
||||
});
|
||||
} else {
|
||||
toast({
|
||||
title: t('core.dataset.import.Upload success'),
|
||||
status: 'success'
|
||||
});
|
||||
}
|
||||
onSuccess() {
|
||||
toast({
|
||||
title: t('core.dataset.import.Import success'),
|
||||
status: 'success'
|
||||
});
|
||||
|
||||
// close import page
|
||||
router.replace({
|
||||
|
|
@ -175,21 +120,21 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
|||
}
|
||||
});
|
||||
},
|
||||
onError() {
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.createStatus === 'creating'
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'waiting'
|
||||
}
|
||||
: source
|
||||
)
|
||||
);
|
||||
},
|
||||
errorToast: t('common.file.Upload failed')
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
setUploadList(
|
||||
sources.map((item) => {
|
||||
return {
|
||||
...item,
|
||||
uploadedFileRate: item.file ? 0 : -1,
|
||||
uploadedChunksRate: 0
|
||||
};
|
||||
})
|
||||
);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<TableContainer>
|
||||
|
|
@ -199,85 +144,35 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
|||
<Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Source name')}
|
||||
</Th>
|
||||
{showPreviewChunks ? (
|
||||
<>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.Chunk amount')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload file progress')}
|
||||
</Th>
|
||||
<Th borderRightRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Data file progress')}
|
||||
</Th>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload status')}
|
||||
</Th>
|
||||
</>
|
||||
)}
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload status')}
|
||||
</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{uploadList.map((item) => (
|
||||
{sources.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td display={'flex'} alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
{item.sourceName}
|
||||
<Td>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
<Box whiteSpace={'wrap'} maxW={'30vw'}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>
|
||||
<Box display={'inline-block'}>
|
||||
{item.createStatus === 'waiting' && (
|
||||
<Tag colorSchema={'gray'}>{t('common.Waiting')}</Tag>
|
||||
)}
|
||||
{item.createStatus === 'creating' && (
|
||||
<Tag colorSchema={'blue'}>{t('common.Creating')}</Tag>
|
||||
)}
|
||||
{item.createStatus === 'finish' && (
|
||||
<Tag colorSchema={'green'}>{t('common.Finish')}</Tag>
|
||||
)}
|
||||
</Box>
|
||||
</Td>
|
||||
{showPreviewChunks ? (
|
||||
<>
|
||||
<Td>{item.chunks.length}</Td>
|
||||
<Td>
|
||||
{item.uploadedFileRate === -1 ? (
|
||||
'-'
|
||||
) : (
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedFileRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={'blue'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedFileRate}%`}
|
||||
</Flex>
|
||||
)}
|
||||
</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedChunksRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={'purple'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedChunksRate}%`}
|
||||
</Flex>
|
||||
</Td>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Td color={item.uploadedFileRate === 100 ? 'green.600' : 'myGray.600'}>
|
||||
{item.uploadedFileRate === 100 ? t('common.Finish') : t('common.Waiting')}
|
||||
</Td>
|
||||
</>
|
||||
)}
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
|
|
@ -286,8 +181,8 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
|||
|
||||
<Flex justifyContent={'flex-end'} mt={4}>
|
||||
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
|
||||
{uploadList.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: uploadList.length })} | `
|
||||
{sources.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: sources.length })} | `
|
||||
: ''}
|
||||
{t('core.dataset.import.Start upload')}
|
||||
</Button>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,296 @@
|
|||
import MyBox from '@/components/common/MyBox';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { Box, FlexProps } from '@chakra-ui/react';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { DragEvent, useCallback, useMemo, useState } from 'react';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { uploadFile2DB } from '@/web/common/file/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
fileId: string;
|
||||
folderPath: string;
|
||||
file: File;
|
||||
};
|
||||
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
selectFiles,
|
||||
setSelectFiles,
|
||||
onStartSelect,
|
||||
onFinishSelect,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
selectFiles: ImportSourceItemType[];
|
||||
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
onStartSelect: () => void;
|
||||
onFinishSelect: () => void;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
|
||||
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
|
||||
|
||||
const { File, onOpen } = useSelectFile({
|
||||
fileType,
|
||||
multiple: true,
|
||||
maxCount
|
||||
});
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const isMaxSelected = useMemo(
|
||||
() => selectFiles.length >= maxCount,
|
||||
[maxCount, selectFiles.length]
|
||||
);
|
||||
|
||||
const filterTypeReg = new RegExp(
|
||||
`(${fileType
|
||||
.split(',')
|
||||
.map((item) => item.trim())
|
||||
.join('|')})$`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
onStartSelect();
|
||||
setSelectFiles((state) => {
|
||||
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
|
||||
const { fileId, file } = selectFile;
|
||||
|
||||
return {
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
file,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
isUploading: true,
|
||||
uploadedFileRate: 0
|
||||
};
|
||||
});
|
||||
const results = formatFiles.concat(state).slice(0, maxCount);
|
||||
return results;
|
||||
});
|
||||
try {
|
||||
// upload file
|
||||
await Promise.all(
|
||||
files.map(async ({ fileId, file }) => {
|
||||
const uploadFileId = await uploadFile2DB({
|
||||
file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
percentListen: (e) => {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
uploadedFileRate: e
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
dbFileId: uploadFileId,
|
||||
isUploading: false
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
})
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
onFinishSelect();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
if (selectFiles.length + files.length > maxCount) {
|
||||
files = files.slice(0, maxCount - selectFiles.length);
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file count exceeds limit', { maxCount })
|
||||
});
|
||||
}
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
if (filterFiles.length < files.length) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
},
|
||||
[maxCount, maxSize, onSelectFile, selectFiles.length, t, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
|
||||
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
};
|
||||
|
||||
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
|
||||
const items = e.dataTransfer.items;
|
||||
const fileList: SelectFileItemType[] = [];
|
||||
|
||||
if (e.dataTransfer.items.length <= 1) {
|
||||
const traverseFileTree = async (item: any) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (item.isFile) {
|
||||
item.file((file: File) => {
|
||||
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
|
||||
|
||||
if (filterTypeReg.test(file.name)) {
|
||||
fileList.push({
|
||||
fileId: getNanoid(),
|
||||
folderPath,
|
||||
file
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
} else if (item.isDirectory) {
|
||||
const dirReader = item.createReader();
|
||||
dirReader.readEntries(async (entries: any[]) => {
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
await traverseFileTree(entries[i]);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
for await (const item of items) {
|
||||
await traverseFileTree(item.webkitGetAsEntry());
|
||||
}
|
||||
} else {
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
let isErr = files.some((item) => item.type === '');
|
||||
if (isErr) {
|
||||
return toast({
|
||||
title: t('file.upload error description'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
|
||||
fileList.push(
|
||||
...files
|
||||
.filter((item) => filterTypeReg.test(item.name))
|
||||
.map((file) => ({
|
||||
fileId: getNanoid(),
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
selectFileCallback(fileList.slice(0, maxCount));
|
||||
};
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
px={3}
|
||||
py={[4, 7]}
|
||||
borderWidth={'1.5px'}
|
||||
borderStyle={'dashed'}
|
||||
borderRadius={'md'}
|
||||
{...(isMaxSelected
|
||||
? {}
|
||||
: {
|
||||
cursor: 'pointer',
|
||||
_hover: {
|
||||
bg: 'primary.50',
|
||||
borderColor: 'primary.600'
|
||||
},
|
||||
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
|
||||
onDragEnter: handleDragEnter,
|
||||
onDragOver: (e) => e.preventDefault(),
|
||||
onDragLeave: handleDragLeave,
|
||||
onDrop: handleDrop,
|
||||
onClick: onOpen
|
||||
})}
|
||||
{...props}
|
||||
>
|
||||
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
|
||||
{isMaxSelected ? (
|
||||
<>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
已达到最大文件数量
|
||||
</Box>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Box fontWeight={'bold'}>
|
||||
{isDragging
|
||||
? t('file.Release the mouse to upload the file')
|
||||
: t('common.file.Select and drag file tip')}
|
||||
</Box>
|
||||
{/* file type */}
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{t('common.file.Support file type', { fileType })}
|
||||
</Box>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{/* max count */}
|
||||
{maxCount && t('common.file.Support max count', { maxCount })}
|
||||
{/* max size */}
|
||||
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
|
||||
</Box>
|
||||
|
||||
<File
|
||||
onSelect={(files) =>
|
||||
selectFileCallback(
|
||||
files.map((file) => ({
|
||||
fileId: getNanoid(),
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
)
|
||||
}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileSelector);
|
||||
|
|
@ -3,9 +3,9 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
|
|||
import { ModalBody, ModalFooter, Button } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import { ImportDataSourceEnum } from '..';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../..';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
|
||||
const { t } = useTranslation();
|
||||
|
|
@ -1,132 +1,94 @@
|
|||
import React, { useMemo, useState } from 'react';
|
||||
import { Box, Flex } from '@chakra-ui/react';
|
||||
import React, { useState } from 'react';
|
||||
import { Box, Flex, IconButton } from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
import RowTabs from '@fastgpt/web/components/common/Tabs/RowTabs';
|
||||
import { useImportStore } from '../Provider';
|
||||
import MyMenu from '@/components/MyMenu';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import dynamic from 'next/dynamic';
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
|
||||
|
||||
enum PreviewListEnum {
|
||||
chunks = 'chunks',
|
||||
sources = 'sources'
|
||||
}
|
||||
|
||||
const Preview = ({
|
||||
sources,
|
||||
showPreviewChunks
|
||||
}: {
|
||||
sources: ImportSourceItemType[];
|
||||
showPreviewChunks: boolean;
|
||||
}) => {
|
||||
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const { t } = useTranslation();
|
||||
const [previewListType, setPreviewListType] = useState(
|
||||
showPreviewChunks ? PreviewListEnum.chunks : PreviewListEnum.sources
|
||||
);
|
||||
|
||||
const chunks = useMemo(() => {
|
||||
const oneSourceChunkLength = Math.max(4, Math.floor(50 / sources.length));
|
||||
return sources
|
||||
.map((source) =>
|
||||
source.chunks.slice(0, oneSourceChunkLength).map((chunk, i) => ({
|
||||
...chunk,
|
||||
index: i + 1,
|
||||
sourceName: source.sourceName,
|
||||
sourceIcon: source.icon
|
||||
}))
|
||||
)
|
||||
.flat();
|
||||
}, [sources]);
|
||||
const { sources } = useImportStore();
|
||||
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
|
||||
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
|
||||
|
||||
return (
|
||||
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'} flex={'1 0 0'}>
|
||||
<Box>
|
||||
<RowTabs
|
||||
list={[
|
||||
...(showPreviewChunks
|
||||
? [
|
||||
{
|
||||
icon: 'common/viewLight',
|
||||
label: t('core.dataset.import.Preview chunks'),
|
||||
value: PreviewListEnum.chunks
|
||||
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
|
||||
<Box fontSize={'lg'}>{t('core.dataset.import.Sources list')}</Box>
|
||||
</Flex>
|
||||
<Box mt={3} flex={'1 0 0'} width={'100%'} overflow={'auto'}>
|
||||
{sources.map((source) => (
|
||||
<Flex
|
||||
key={source.id}
|
||||
bg={'white'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
mb={3}
|
||||
alignItems={'center'}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={'16px'} />
|
||||
<Box mx={1} flex={'1 0 0'} w={0} className="textEllipsis">
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
{showPreviewChunks && (
|
||||
<Box fontSize={'xs'} color={'myGray.600'}>
|
||||
<MyMenu
|
||||
Button={
|
||||
<IconButton
|
||||
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
|
||||
aria-label={''}
|
||||
size={'sm'}
|
||||
variant={'whitePrimary'}
|
||||
/>
|
||||
}
|
||||
]
|
||||
: []),
|
||||
{
|
||||
icon: 'core/dataset/fileCollection',
|
||||
label: t('core.dataset.import.Sources list'),
|
||||
value: PreviewListEnum.sources
|
||||
}
|
||||
]}
|
||||
value={previewListType}
|
||||
onChange={(e) => setPreviewListType(e as PreviewListEnum)}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={3} flex={'1 0 0'} overflow={'auto'}>
|
||||
{previewListType === PreviewListEnum.chunks ? (
|
||||
<>
|
||||
{chunks.map((chunk, i) => (
|
||||
<Box
|
||||
key={i}
|
||||
p={4}
|
||||
bg={'white'}
|
||||
mb={3}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
>
|
||||
<Flex mb={1} alignItems={'center'} fontSize={'sm'}>
|
||||
<Box
|
||||
flexShrink={0}
|
||||
px={1}
|
||||
color={'primary.600'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'primary.200'}
|
||||
bg={'primary.50'}
|
||||
borderRadius={'sm'}
|
||||
>
|
||||
# {chunk.index}
|
||||
</Box>
|
||||
<Flex ml={2} fontWeight={'bold'} alignItems={'center'} gap={1}>
|
||||
<MyIcon name={chunk.sourceIcon as any} w={'14px'} />
|
||||
{chunk.sourceName}
|
||||
</Flex>
|
||||
</Flex>
|
||||
<Box fontSize={'xs'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
|
||||
<Box color={'myGray.900'}>{chunk.q}</Box>
|
||||
<Box color={'myGray.500'}>{chunk.a}</Box>
|
||||
</Box>
|
||||
menuList={[
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
|
||||
{t('core.dataset.import.Preview raw text')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewRawTextSource(source)
|
||||
},
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
|
||||
{t('core.dataset.import.Preview chunks')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewChunkSource(source)
|
||||
}
|
||||
]}
|
||||
/>
|
||||
</Box>
|
||||
))}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
{sources.map((source) => (
|
||||
<Flex
|
||||
key={source.id}
|
||||
bg={'white'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
mb={3}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={'16px'} />
|
||||
<Box mx={1} flex={'1 0 0'} className="textEllipsis">
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
{showPreviewChunks && (
|
||||
<Box>
|
||||
{t('core.dataset.import.File chunk amount', { amount: source.chunks.length })}
|
||||
</Box>
|
||||
)}
|
||||
</Flex>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
)}
|
||||
</Flex>
|
||||
))}
|
||||
</Box>
|
||||
{!!previewRawTextSource && (
|
||||
<PreviewRawText
|
||||
previewSource={previewRawTextSource}
|
||||
onClose={() => setPreviewRawTextSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
{!!previewChunkSource && (
|
||||
<PreviewChunks
|
||||
previewSource={previewChunkSource}
|
||||
onClose={() => setPreviewChunkSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
import React, { useMemo } from 'react';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
|
||||
import { getPreviewChunks } from '@/web/core/dataset/api';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const PreviewChunks = ({
|
||||
previewSource,
|
||||
onClose
|
||||
}: {
|
||||
previewSource: ImportSourceItemType;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
|
||||
|
||||
const { data = [], isLoading } = useQuery(
|
||||
['previewSource'],
|
||||
() => {
|
||||
if (
|
||||
importSource === ImportDataSourceEnum.fileLocal ||
|
||||
importSource === ImportDataSourceEnum.csvTable ||
|
||||
importSource === ImportDataSourceEnum.fileLink
|
||||
) {
|
||||
return getPreviewChunks({
|
||||
type: importSource,
|
||||
sourceId: previewSource.dbFileId || previewSource.link || '',
|
||||
chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customSplitChar: processParamsForm.getValues('customSplitChar')
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileCustom) {
|
||||
const customSplitChar = processParamsForm.getValues('customSplitChar');
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: previewSource.rawText || '',
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
return chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
}));
|
||||
}
|
||||
return [];
|
||||
},
|
||||
{
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(err)
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<MyRightDrawer
|
||||
onClose={onClose}
|
||||
iconSrc={previewSource.icon}
|
||||
title={previewSource.sourceName}
|
||||
isLoading={isLoading}
|
||||
maxW={['90vw', '40vw']}
|
||||
>
|
||||
{data.map((item, index) => (
|
||||
<Box
|
||||
key={index}
|
||||
whiteSpace={'pre-wrap'}
|
||||
fontSize={'sm'}
|
||||
p={4}
|
||||
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
|
||||
mb={3}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
_notLast={{
|
||||
mb: 2
|
||||
}}
|
||||
>
|
||||
<Box color={'myGray.900'}>{item.q}</Box>
|
||||
<Box color={'myGray.500'}>{item.a}</Box>
|
||||
</Box>
|
||||
))}
|
||||
</MyRightDrawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(PreviewChunks);
|
||||
|
|
@ -1,28 +1,73 @@
|
|||
import React from 'react';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { ModalBody } from '@chakra-ui/react';
|
||||
|
||||
export type PreviewRawTextProps = {
|
||||
icon: string;
|
||||
title: string;
|
||||
rawText: string;
|
||||
};
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { getPreviewFileContent } from '@/web/common/file/api';
|
||||
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const PreviewRawText = ({
|
||||
icon,
|
||||
title,
|
||||
rawText,
|
||||
previewSource,
|
||||
onClose
|
||||
}: PreviewRawTextProps & {
|
||||
}: {
|
||||
previewSource: ImportSourceItemType;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const { importSource } = useImportStore();
|
||||
|
||||
const { data, isLoading } = useQuery(
|
||||
['previewSource', previewSource?.dbFileId],
|
||||
() => {
|
||||
if (importSource === ImportDataSourceEnum.fileLocal && previewSource.dbFileId) {
|
||||
return getPreviewFileContent({
|
||||
fileId: previewSource.dbFileId,
|
||||
csvFormat: true
|
||||
});
|
||||
}
|
||||
if (importSource === ImportDataSourceEnum.csvTable && previewSource.dbFileId) {
|
||||
return getPreviewFileContent({
|
||||
fileId: previewSource.dbFileId,
|
||||
csvFormat: false
|
||||
});
|
||||
}
|
||||
if (importSource === ImportDataSourceEnum.fileCustom) {
|
||||
return {
|
||||
previewContent: (previewSource.rawText || '').slice(0, 3000)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
previewContent: ''
|
||||
};
|
||||
},
|
||||
{
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(err)
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const rawText = data?.previewContent || '';
|
||||
|
||||
return (
|
||||
<MyModal isOpen onClose={onClose} iconSrc={icon} title={title}>
|
||||
<ModalBody whiteSpace={'pre-wrap'} overflowY={'auto'}>
|
||||
<MyRightDrawer
|
||||
onClose={onClose}
|
||||
iconSrc={previewSource.icon}
|
||||
title={previewSource.sourceName}
|
||||
isLoading={isLoading}
|
||||
>
|
||||
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} fontSize={'sm'}>
|
||||
{rawText}
|
||||
</ModalBody>
|
||||
</MyModal>
|
||||
</Box>
|
||||
</MyRightDrawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default PreviewRawText;
|
||||
export default React.memo(PreviewRawText);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,119 @@
|
|||
import React, { useState } from 'react';
|
||||
import {
|
||||
Flex,
|
||||
TableContainer,
|
||||
Table,
|
||||
Thead,
|
||||
Tr,
|
||||
Th,
|
||||
Td,
|
||||
Tbody,
|
||||
Progress,
|
||||
IconButton
|
||||
} from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import dynamic from 'next/dynamic';
|
||||
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
|
||||
export const RenderUploadFiles = ({
|
||||
files,
|
||||
setFiles,
|
||||
showPreviewContent
|
||||
}: {
|
||||
files: ImportSourceItemType[];
|
||||
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
showPreviewContent?: boolean;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
|
||||
|
||||
return files.length > 0 ? (
|
||||
<>
|
||||
<TableContainer mt={5}>
|
||||
<Table variant={'simple'} fontSize={'sm'} draggable={false}>
|
||||
<Thead draggable={false}>
|
||||
<Tr bg={'myGray.100'} mb={2}>
|
||||
<Th borderLeftRadius={'md'} borderBottom={'none'} py={4}>
|
||||
{t('common.file.File Name')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload file progress')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('common.file.File Size')}
|
||||
</Th>
|
||||
<Th borderRightRadius={'md'} borderBottom={'none'} py={4}>
|
||||
{t('common.Action')}
|
||||
</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{files.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
{item.sourceName}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedFileRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={(item.uploadedFileRate || 0) >= 100 ? 'green' : 'blue'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedFileRate}%`}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>{item.sourceSize}</Td>
|
||||
<Td>
|
||||
{!item.isUploading && (
|
||||
<Flex alignItems={'center'} gap={4}>
|
||||
{showPreviewContent && (
|
||||
<MyTooltip label={t('core.dataset.import.Preview raw text')}>
|
||||
<IconButton
|
||||
variant={'whitePrimary'}
|
||||
size={'sm'}
|
||||
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
|
||||
aria-label={''}
|
||||
onClick={() => setPreviewFile(item)}
|
||||
/>
|
||||
</MyTooltip>
|
||||
)}
|
||||
|
||||
<IconButton
|
||||
variant={'grayDanger'}
|
||||
size={'sm'}
|
||||
icon={<MyIcon name={'delete'} w={'14px'} />}
|
||||
aria-label={''}
|
||||
onClick={() => {
|
||||
setFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
)}
|
||||
</Td>
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{!!previewFile && (
|
||||
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
|
||||
)}
|
||||
</>
|
||||
) : null;
|
||||
};
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
import React, { useEffect } from 'react';
|
||||
import React, { useCallback, useEffect } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
|
|
@ -19,7 +19,7 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
|||
<>
|
||||
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
|
@ -36,6 +36,24 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
|
|||
}
|
||||
});
|
||||
|
||||
const onSubmit = useCallback(
|
||||
(data: { name: string; value: string }) => {
|
||||
const fileId = getNanoid(32);
|
||||
|
||||
setSources([
|
||||
{
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
rawText: data.value,
|
||||
sourceName: data.name,
|
||||
icon: 'file/fill/manual'
|
||||
}
|
||||
]);
|
||||
goToNext();
|
||||
},
|
||||
[goToNext, setSources]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const source = sources[0];
|
||||
if (source) {
|
||||
|
|
@ -78,25 +96,7 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
|
|||
/>
|
||||
</Box>
|
||||
<Flex mt={5} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={handleSubmit((data) => {
|
||||
const fileId = getNanoid(32);
|
||||
|
||||
setSources([
|
||||
{
|
||||
id: fileId,
|
||||
rawText: data.value,
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceName: data.name,
|
||||
icon: 'file/fill/manual'
|
||||
}
|
||||
]);
|
||||
goToNext();
|
||||
})}
|
||||
>
|
||||
{t('common.Next Step')}
|
||||
</Button>
|
||||
<Button onClick={handleSubmit((data) => onSubmit(data))}>{t('common.Next Step')}</Button>
|
||||
</Flex>
|
||||
</Box>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
|||
<>
|
||||
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks={false} />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
|
@ -128,10 +128,8 @@ const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
|
|||
setSources(
|
||||
newLinkList.map((link) => ({
|
||||
id: getNanoid(32),
|
||||
createStatus: 'waiting',
|
||||
link,
|
||||
rawText: '',
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceName: link,
|
||||
icon: LinkCollectionIcon
|
||||
}))
|
||||
|
|
|
|||
|
|
@ -1,41 +1,27 @@
|
|||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import React, { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button } from '@chakra-ui/react';
|
||||
import FileSelector from '../components/FileSelector';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { readFileRawContent } from '@fastgpt/web/common/file/read';
|
||||
import { getUploadBase64ImgController } from '@/web/common/file/controller';
|
||||
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import type { PreviewRawTextProps } from '../components/PreviewRawText';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
import Loading from '@fastgpt/web/components/common/MyLoading';
|
||||
import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
|
||||
loading: () => <Loading fixed={false} />
|
||||
});
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
const PreviewRawText = dynamic(() => import('../components/PreviewRawText'));
|
||||
|
||||
type FileItemType = ImportSourceItemType & { file: File };
|
||||
const fileType = '.txt, .docx, .csv, .pdf, .md, .html';
|
||||
const maxSelectFileCount = 1000;
|
||||
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
||||
|
||||
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
|
@ -44,135 +30,47 @@ export default React.memo(FileLocal);
|
|||
|
||||
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { sources, setSources } = useImportStore();
|
||||
// @ts-ignore
|
||||
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
|
||||
sources.map((source) => ({
|
||||
isUploading: false,
|
||||
...source
|
||||
}))
|
||||
);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
|
||||
|
||||
const [previewRaw, setPreviewRaw] = useState<PreviewRawTextProps>();
|
||||
|
||||
useEffect(() => {
|
||||
setSources(successFiles);
|
||||
}, [successFiles]);
|
||||
}, [setSources, successFiles]);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
for await (const selectFile of files) {
|
||||
const { file, folderPath } = selectFile;
|
||||
const relatedId = getNanoid(32);
|
||||
|
||||
const { rawText } = await (() => {
|
||||
try {
|
||||
return readFileRawContent({
|
||||
file,
|
||||
uploadBase64Controller: (base64Img) =>
|
||||
getUploadBase64ImgController({
|
||||
base64Img,
|
||||
type: MongoImageTypeEnum.collectionImage,
|
||||
metadata: {
|
||||
relatedId
|
||||
}
|
||||
})
|
||||
});
|
||||
} catch (error) {
|
||||
return { rawText: '' };
|
||||
}
|
||||
})();
|
||||
|
||||
const item: FileItemType = {
|
||||
id: relatedId,
|
||||
file,
|
||||
rawText,
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceFolderPath: folderPath,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
errorMsg: rawText.length === 0 ? t('common.file.Empty file tip') : ''
|
||||
};
|
||||
|
||||
setSelectFiles((state) => {
|
||||
const results = [item].concat(state).slice(0, maxSelectFileCount);
|
||||
return results;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
const onclickNext = useCallback(() => {
|
||||
// filter uploaded files
|
||||
setSelectFiles((state) => state.filter((item) => (item.uploadedFileRate || 0) >= 100));
|
||||
goToNext();
|
||||
}, [goToNext]);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FileSelector
|
||||
isLoading={isLoading}
|
||||
fileType={fileType}
|
||||
multiple
|
||||
maxCount={maxSelectFileCount}
|
||||
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
|
||||
onSelectFile={onSelectFile}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
onStartSelect={() => setUploading(true)}
|
||||
onFinishSelect={() => setUploading(false)}
|
||||
/>
|
||||
|
||||
{/* render files */}
|
||||
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
|
||||
{selectFiles.map((item) => (
|
||||
<MyTooltip key={item.id} label={t('core.dataset.import.Preview raw text')}>
|
||||
<Flex
|
||||
alignItems={'center'}
|
||||
px={4}
|
||||
py={3}
|
||||
borderRadius={'md'}
|
||||
bg={'myGray.100'}
|
||||
cursor={'pointer'}
|
||||
onClick={() =>
|
||||
setPreviewRaw({
|
||||
icon: item.icon,
|
||||
title: item.sourceName,
|
||||
rawText: item.rawText.slice(0, 10000)
|
||||
})
|
||||
}
|
||||
>
|
||||
<MyIcon name={item.icon as any} w={'16px'} />
|
||||
<Box ml={1} mr={3}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
|
||||
{item.sourceSize}
|
||||
{item.rawText.length > 0 && (
|
||||
<>,{t('common.Number of words', { amount: item.rawText.length })}</>
|
||||
)}
|
||||
</Box>
|
||||
{item.errorMsg && (
|
||||
<MyTooltip label={item.errorMsg}>
|
||||
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
|
||||
</MyTooltip>
|
||||
)}
|
||||
<MyIcon
|
||||
name={'common/closeLight'}
|
||||
w={'14px'}
|
||||
color={'myGray.500'}
|
||||
cursor={'pointer'}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
</MyTooltip>
|
||||
))}
|
||||
</Flex>
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
|
||||
|
||||
<Box textAlign={'right'}>
|
||||
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common.Next Step')}
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
{previewRaw && <PreviewRawText {...previewRaw} onClose={() => setPreviewRaw(undefined)} />}
|
||||
</Box>
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,108 +1,62 @@
|
|||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button } from '@chakra-ui/react';
|
||||
import FileSelector from '../components/FileSelector';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
import { fileDownload } from '@/web/common/file/utils';
|
||||
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
|
||||
import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
|
||||
type FileItemType = ImportSourceItemType & { file: File };
|
||||
const fileType = '.csv';
|
||||
const maxSelectFileCount = 1000;
|
||||
|
||||
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
|
||||
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileLocal);
|
||||
|
||||
const csvTemplate = `index,content
|
||||
"必填内容","可选内容。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
|
||||
const csvTemplate = `"第一列内容","第二列内容"
|
||||
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
|
||||
"只会讲第一和第二列内容导入,其余列会被忽略",""
|
||||
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段,即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
|
||||
"AIGC发展分为几个阶段?","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
|
||||
|
||||
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { sources, setSources } = useImportStore();
|
||||
// @ts-ignore
|
||||
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
|
||||
sources.map((source) => ({
|
||||
isUploading: false,
|
||||
...source
|
||||
}))
|
||||
);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
|
||||
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
|
||||
|
||||
useEffect(() => {
|
||||
setSources(successFiles);
|
||||
}, [successFiles]);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
for await (const selectFile of files) {
|
||||
const { file, folderPath } = selectFile;
|
||||
const { header, data } = await readCsvContent({ file });
|
||||
|
||||
const filterData: FileItemType['chunks'] = data
|
||||
.filter((item) => item[0])
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || '',
|
||||
chunkIndex: 0
|
||||
}));
|
||||
|
||||
const item: FileItemType = {
|
||||
id: getNanoid(32),
|
||||
file,
|
||||
rawText: '',
|
||||
chunks: filterData,
|
||||
chunkChars: 0,
|
||||
sourceFolderPath: folderPath,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
errorMsg:
|
||||
header[0] !== 'index' || header[1] !== 'content' || filterData.length === 0
|
||||
? t('core.dataset.import.Csv format error')
|
||||
: ''
|
||||
};
|
||||
|
||||
setSelectFiles((state) => {
|
||||
const results = [item].concat(state).slice(0, 10);
|
||||
return results;
|
||||
});
|
||||
}
|
||||
}
|
||||
},
|
||||
errorToast: t('common.file.Select failed')
|
||||
});
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FileSelector
|
||||
multiple
|
||||
maxCount={maxSelectFileCount}
|
||||
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
|
||||
isLoading={isLoading}
|
||||
fileType={fileType}
|
||||
onSelectFile={onSelectFile}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
onStartSelect={() => setUploading(true)}
|
||||
onFinishSelect={() => setUploading(false)}
|
||||
/>
|
||||
|
||||
<Box
|
||||
|
|
@ -122,43 +76,16 @@ const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () =
|
|||
</Box>
|
||||
|
||||
{/* render files */}
|
||||
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
|
||||
{selectFiles.map((item) => (
|
||||
<Flex
|
||||
key={item.id}
|
||||
alignItems={'center'}
|
||||
px={4}
|
||||
py={2}
|
||||
borderRadius={'md'}
|
||||
bg={'myGray.100'}
|
||||
>
|
||||
<MyIcon name={item.icon as any} w={'16px'} />
|
||||
<Box ml={1} mr={3}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
|
||||
{item.sourceSize}
|
||||
</Box>
|
||||
{item.errorMsg && (
|
||||
<MyTooltip label={item.errorMsg}>
|
||||
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
|
||||
</MyTooltip>
|
||||
)}
|
||||
<MyIcon
|
||||
name={'common/closeLight'}
|
||||
w={'14px'}
|
||||
color={'myGray.500'}
|
||||
cursor={'pointer'}
|
||||
onClick={() => {
|
||||
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
))}
|
||||
</Flex>
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
|
||||
|
||||
<Box textAlign={'right'}>
|
||||
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button
|
||||
isDisabled={successFiles.length === 0 || uploading}
|
||||
onClick={() => {
|
||||
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
|
|
|
|||
|
|
@ -6,22 +6,15 @@ import { useRouter } from 'next/router';
|
|||
import { TabEnum } from '../../index';
|
||||
import { useMyStep } from '@fastgpt/web/hooks/useStep';
|
||||
import dynamic from 'next/dynamic';
|
||||
import Provider from './Provider';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import Provider from './Provider';
|
||||
|
||||
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
|
||||
const FileLink = dynamic(() => import('./diffSource/FileLink'));
|
||||
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
|
||||
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
|
||||
|
||||
export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
|
||||
tableLocal = 'tableLocal'
|
||||
}
|
||||
|
||||
const ImportDataset = () => {
|
||||
const { t } = useTranslation();
|
||||
const router = useRouter();
|
||||
|
|
@ -65,7 +58,7 @@ const ImportDataset = () => {
|
|||
title: t('core.dataset.import.Upload data')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.tableLocal]: [
|
||||
[ImportDataSourceEnum.csvTable]: [
|
||||
{
|
||||
title: t('core.dataset.import.Select file')
|
||||
},
|
||||
|
|
@ -88,7 +81,7 @@ const ImportDataset = () => {
|
|||
if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
|
||||
if (source === ImportDataSourceEnum.fileLink) return FileLink;
|
||||
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
|
||||
if (source === ImportDataSourceEnum.tableLocal) return TableLocal;
|
||||
if (source === ImportDataSourceEnum.csvTable) return TableLocal;
|
||||
}, [source]);
|
||||
|
||||
return ImportComponent ? (
|
||||
|
|
@ -142,7 +135,7 @@ const ImportDataset = () => {
|
|||
<MyStep />
|
||||
</Box>
|
||||
</Box>
|
||||
<Provider dataset={datasetDetail} parentId={parentId}>
|
||||
<Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
|
||||
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
|
||||
<ImportComponent activeStep={activeStep} goToNext={goToNext} />
|
||||
</Box>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,7 @@
|
|||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
|
||||
export type UploadFileItemType = ImportSourceItemType & {
|
||||
file?: File;
|
||||
isUploading: boolean;
|
||||
uploadedFileRate: number;
|
||||
};
|
||||
|
|
@ -1,19 +1,5 @@
|
|||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Textarea,
|
||||
Button,
|
||||
Flex,
|
||||
useTheme,
|
||||
useDisclosure,
|
||||
Table,
|
||||
Thead,
|
||||
Tbody,
|
||||
Tr,
|
||||
Th,
|
||||
Td,
|
||||
TableContainer
|
||||
} from '@chakra-ui/react';
|
||||
import { Box, Textarea, Button, Flex, useTheme, useDisclosure } from '@chakra-ui/react';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
|
||||
import { postSearchText } from '@/web/core/dataset/api';
|
||||
|
|
@ -36,10 +22,7 @@ import { useForm } from 'react-hook-form';
|
|||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { fileDownload } from '@/web/common/file/utils';
|
||||
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import QuoteItem from '@/components/core/dataset/QuoteItem';
|
||||
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import SearchParamsTip from '@/components/core/dataset/SearchParamsTip';
|
||||
|
||||
|
|
@ -134,34 +117,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||
});
|
||||
}
|
||||
});
|
||||
// const { mutate: onFileTest, isLoading: fileTestIsLoading } = useRequest({
|
||||
// mutationFn: async ({ searchParams }: FormType) => {
|
||||
// if (!selectFile) return Promise.reject('File is not selected');
|
||||
// const { data } = await readCsvContent({ file: selectFile });
|
||||
// const testList = data.slice(0, 100);
|
||||
// const results: SearchTestResponse[] = [];
|
||||
|
||||
// for await (const item of testList) {
|
||||
// try {
|
||||
// const result = await postSearchText({ datasetId, text: item[0].trim(), ...searchParams });
|
||||
// results.push(result);
|
||||
// } catch (error) {
|
||||
// await delay(500);
|
||||
// }
|
||||
// }
|
||||
|
||||
// return results;
|
||||
// },
|
||||
// onSuccess(res: SearchTestResponse[]) {
|
||||
// console.log(res);
|
||||
// },
|
||||
// onError(err) {
|
||||
// toast({
|
||||
// title: getErrText(err),
|
||||
// status: 'error'
|
||||
// });
|
||||
// }
|
||||
// });
|
||||
|
||||
const onSelectFile = async (files: File[]) => {
|
||||
const file = files[0];
|
||||
|
|
|
|||
|
|
@ -101,7 +101,9 @@ const Standard = ({
|
|||
{t('support.wallet.subscription.Sub plan')}
|
||||
</Box>
|
||||
<Box mt={8} mb={10} color={'myGray.500'} fontSize={'lg'}>
|
||||
{t('support.wallet.subscription.Sub plan tip')}
|
||||
{t('support.wallet.subscription.Sub plan tip', {
|
||||
title: feConfigs?.systemTitle
|
||||
})}
|
||||
</Box>
|
||||
<Box>
|
||||
<RowTabs
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import { checkTeamAiPointsAndLock } from './utils';
|
|||
import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { countGptMessagesTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
|
||||
|
|
@ -128,7 +128,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
|||
});
|
||||
|
||||
// get vector and insert
|
||||
const { insertLen } = await pushDataListToTrainingQueue({
|
||||
const { insertLen } = await pushDataListToTrainingQueueByCollectionId({
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
collectionId: data.collectionId,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
|
||||
import { GET, POST } from '@/web/common/api/request';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
|
||||
import { AxiosProgressEvent } from 'axios';
|
||||
|
||||
|
|
@ -8,10 +8,16 @@ export const postUploadFiles = (
|
|||
data: FormData,
|
||||
onUploadProgress: (progressEvent: AxiosProgressEvent) => void
|
||||
) =>
|
||||
POST<string[]>('/common/file/upload', data, {
|
||||
POST<string>('/common/file/upload', data, {
|
||||
timeout: 480000,
|
||||
onUploadProgress,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
|
||||
POST<{
|
||||
previewContent: string;
|
||||
totalLength: number;
|
||||
}>('/common/file/previewContent', data);
|
||||
|
|
|
|||
|
|
@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
|
|||
/**
|
||||
* upload file to mongo gridfs
|
||||
*/
|
||||
export const uploadFiles = ({
|
||||
files,
|
||||
export const uploadFile2DB = ({
|
||||
file,
|
||||
bucketName,
|
||||
metadata = {},
|
||||
percentListen
|
||||
}: {
|
||||
files: File[];
|
||||
file: File;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
metadata?: Record<string, any>;
|
||||
percentListen?: (percent: number) => void;
|
||||
|
|
@ -21,9 +21,7 @@ export const uploadFiles = ({
|
|||
const form = new FormData();
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', bucketName);
|
||||
files.forEach((file) => {
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
});
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
return postUploadFiles(form, (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
|
|
|
|||
|
|
@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
|
|||
accept={fileType}
|
||||
multiple={multiple}
|
||||
onChange={(e) => {
|
||||
if (!e.target.files || e.target.files?.length === 0) return;
|
||||
if (e.target.files.length > maxCount) {
|
||||
return toast({
|
||||
const files = e.target.files;
|
||||
if (!files || files?.length === 0) return;
|
||||
|
||||
let fileList = Array.from(files);
|
||||
if (fileList.length > maxCount) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Select file amount limit', { max: maxCount })
|
||||
});
|
||||
fileList = fileList.slice(0, maxCount);
|
||||
}
|
||||
onSelect(Array.from(e.target.files), openSign.current);
|
||||
onSelect(fileList, openSign.current);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
|
|
|
|||
|
|
@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
|
|||
let options = {};
|
||||
if (MediaRecorder.isTypeSupported('audio/webm')) {
|
||||
options = { type: 'audio/webm' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||
options = { type: 'video/mp4' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
|
||||
options = { type: 'video/mp3' };
|
||||
} else {
|
||||
console.error('no suitable mimetype found for this device');
|
||||
}
|
||||
const blob = new Blob(chunks, options);
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
|
||||
formData.append('file', blob, 'recording.mp4');
|
||||
formData.append('file', blob, 'recording.mp3');
|
||||
formData.append(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
|
|
|
|||
|
|
@ -8,13 +8,19 @@ import type {
|
|||
} from '@/global/core/api/datasetReq.d';
|
||||
import type {
|
||||
CreateDatasetCollectionParams,
|
||||
CsvTableCreateDatasetCollectionParams,
|
||||
DatasetUpdateBody,
|
||||
FileIdCreateDatasetCollectionParams,
|
||||
LinkCreateDatasetCollectionParams,
|
||||
PostWebsiteSyncParams
|
||||
PostWebsiteSyncParams,
|
||||
TextCreateDatasetCollectionParams
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type {
|
||||
GetTrainingQueueProps,
|
||||
GetTrainingQueueResponse,
|
||||
PostPreviewFilesChunksProps,
|
||||
PostPreviewFilesChunksResponse,
|
||||
PostPreviewTableChunksResponse,
|
||||
SearchTestProps,
|
||||
SearchTestResponse
|
||||
} from '@/global/core/dataset/api.d';
|
||||
|
|
@ -23,10 +29,7 @@ import type {
|
|||
CreateDatasetParams,
|
||||
InsertOneDatasetDataProps
|
||||
} from '@/global/core/dataset/api.d';
|
||||
import type {
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
|
|
@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
|
|||
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
|
||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||
POST<string>(`/core/dataset/collection/create`, data);
|
||||
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
|
||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
|
||||
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
|
||||
|
||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||
POST(`/core/dataset/collection/update`, data);
|
||||
|
|
@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
|
|||
export const getDatasetDataItemById = (id: string) =>
|
||||
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
|
||||
|
||||
/**
|
||||
* push data to training queue
|
||||
*/
|
||||
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
|
||||
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
|
||||
|
||||
/**
|
||||
* insert one data to dataset (immediately insert)
|
||||
*/
|
||||
|
|
@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
|
|||
/* get length of system training queue */
|
||||
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
|
||||
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
|
||||
|
||||
/* ================== file ======================== */
|
||||
export const getFileViewUrl = (fileId: string) =>
|
||||
|
|
|
|||
|
|
@ -1,200 +0,0 @@
|
|||
import MyBox from '@/components/common/MyBox';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { Box, FlexProps } from '@chakra-ui/react';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { DragEvent, useCallback, useState } from 'react';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
folderPath: string;
|
||||
file: File;
|
||||
};
|
||||
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount,
|
||||
maxSize,
|
||||
isLoading,
|
||||
onSelectFile,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
multiple?: boolean;
|
||||
maxCount?: number;
|
||||
maxSize?: number;
|
||||
isLoading?: boolean;
|
||||
onSelectFile: (e: SelectFileItemType[]) => any;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount
|
||||
});
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
|
||||
const filterTypeReg = new RegExp(
|
||||
`(${fileType
|
||||
.split(',')
|
||||
.map((item) => item.trim())
|
||||
.join('|')})$`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
if (filterFiles.length < files.length) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
},
|
||||
[maxSize, onSelectFile, t, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
|
||||
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
};
|
||||
|
||||
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
|
||||
const items = e.dataTransfer.items;
|
||||
const fileList: SelectFileItemType[] = [];
|
||||
|
||||
if (e.dataTransfer.items.length <= 1) {
|
||||
const traverseFileTree = async (item: any) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (item.isFile) {
|
||||
item.file((file: File) => {
|
||||
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
|
||||
|
||||
if (filterTypeReg.test(file.name)) {
|
||||
fileList.push({
|
||||
folderPath,
|
||||
file
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
} else if (item.isDirectory) {
|
||||
const dirReader = item.createReader();
|
||||
dirReader.readEntries(async (entries: any[]) => {
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
await traverseFileTree(entries[i]);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
for await (const item of items) {
|
||||
await traverseFileTree(item.webkitGetAsEntry());
|
||||
}
|
||||
} else {
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
let isErr = files.some((item) => item.type === '');
|
||||
if (isErr) {
|
||||
return toast({
|
||||
title: t('file.upload error description'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
|
||||
fileList.push(
|
||||
...files
|
||||
.filter((item) => filterTypeReg.test(item.name))
|
||||
.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
selectFileCallback(fileList.slice(0, maxCount));
|
||||
};
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
px={3}
|
||||
py={[4, 7]}
|
||||
borderWidth={'1.5px'}
|
||||
borderStyle={'dashed'}
|
||||
borderRadius={'md'}
|
||||
cursor={'pointer'}
|
||||
_hover={{
|
||||
bg: 'primary.50',
|
||||
borderColor: 'primary.600'
|
||||
}}
|
||||
{...(isDragging
|
||||
? {
|
||||
borderColor: 'primary.600'
|
||||
}
|
||||
: {
|
||||
borderColor: 'borderColor.high'
|
||||
})}
|
||||
{...props}
|
||||
onDragEnter={handleDragEnter}
|
||||
onDragOver={(e) => e.preventDefault()}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
onClick={onOpen}
|
||||
>
|
||||
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
|
||||
<Box fontWeight={'bold'}>
|
||||
{isDragging
|
||||
? t('file.Release the mouse to upload the file')
|
||||
: t('common.file.Select and drag file tip')}
|
||||
</Box>
|
||||
{/* file type */}
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{t('common.file.Support file type', { fileType })}
|
||||
</Box>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{/* max count */}
|
||||
{maxCount && t('common.file.Support max count', { maxCount })}
|
||||
{/* max size */}
|
||||
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
|
||||
</Box>
|
||||
|
||||
<File
|
||||
onSelect={(files) =>
|
||||
selectFileCallback(
|
||||
files.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
)
|
||||
}
|
||||
/>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileSelector);
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from './constants';
|
||||
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
|
||||
export type ImportDataComponentProps = {
|
||||
|
|
@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
|
|||
|
||||
export type ImportSourceItemType = {
|
||||
id: string;
|
||||
rawText: string;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
chunkChars: number;
|
||||
sourceFolderPath?: string;
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
createStatus: 'waiting' | 'creating' | 'finish';
|
||||
metadata?: Record<string, any>;
|
||||
errorMsg?: string;
|
||||
|
||||
// source
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
// file
|
||||
isUploading?: boolean;
|
||||
uploadedFileRate?: number;
|
||||
dbFileId?: string; // 存储在数据库里的文件Id,这个 ID 还是图片和集合的 metadata 中 relateId
|
||||
file?: File;
|
||||
|
||||
// link
|
||||
link?: string;
|
||||
|
||||
// custom text
|
||||
rawText?: string;
|
||||
};
|
||||
|
||||
export type ImportSourceParamsType = UseFormReturn<
|
||||
|
|
|
|||
|
|
@ -1,95 +1,5 @@
|
|||
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { getFileViewUrl } from '@/web/core/dataset/api';
|
||||
import { strIsLink } from '@fastgpt/global/common/string/tools';
|
||||
import type {
|
||||
FileCreateDatasetCollectionParams,
|
||||
PushDatasetDataChunkProps
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { POST } from '@/web/common/api/request';
|
||||
|
||||
/* upload a file to create collection */
|
||||
export const fileCollectionCreate = ({
|
||||
file,
|
||||
metadata = {},
|
||||
data,
|
||||
percentListen
|
||||
}: {
|
||||
file: File;
|
||||
metadata?: Record<string, any>;
|
||||
data: FileCreateDatasetCollectionParams;
|
||||
percentListen: (percent: number) => void;
|
||||
}) => {
|
||||
const form = new FormData();
|
||||
form.append('data', JSON.stringify(data));
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', BucketNameEnum.dataset);
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
|
||||
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
|
||||
timeout: 480000,
|
||||
onUploadProgress: (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
const percent = Math.round((e.loaded / e.total) * 100);
|
||||
percentListen && percentListen(percent);
|
||||
},
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export async function chunksUpload({
|
||||
billId,
|
||||
collectionId,
|
||||
trainingMode,
|
||||
chunks,
|
||||
prompt,
|
||||
rate = 50,
|
||||
onUploading
|
||||
}: {
|
||||
billId: string;
|
||||
collectionId: string;
|
||||
trainingMode: `${TrainingModeEnum}`;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
rate?: number;
|
||||
onUploading?: (rate: number) => void;
|
||||
}) {
|
||||
async function upload(data: PushDatasetDataChunkProps[]) {
|
||||
return postChunks2Dataset({
|
||||
collectionId,
|
||||
trainingMode,
|
||||
data,
|
||||
prompt,
|
||||
billId
|
||||
});
|
||||
}
|
||||
|
||||
let successInsert = 0;
|
||||
let retryTimes = 10;
|
||||
for (let i = 0; i < chunks.length; i += rate) {
|
||||
try {
|
||||
const uploadChunks = chunks.slice(i, i + rate);
|
||||
const { insertLen } = await upload(uploadChunks);
|
||||
if (onUploading) {
|
||||
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
|
||||
}
|
||||
successInsert += insertLen;
|
||||
} catch (error) {
|
||||
if (retryTimes === 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(1000);
|
||||
retryTimes--;
|
||||
i -= rate;
|
||||
}
|
||||
}
|
||||
|
||||
return { insertLen: successInsert };
|
||||
}
|
||||
|
||||
export async function getFileAndOpen(fileId: string) {
|
||||
if (strIsLink(fileId)) {
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
|
|||
# please download the model from https://huggingface.co/BAAI/bge-reranker-base and put it in the same directory as Dockerfile
|
||||
COPY ./bge-reranker-base ./bge-reranker-base
|
||||
|
||||
COPY requirement.txt .
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN python3 -m pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
RUN python3 -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
COPY app.py Dockerfile .
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue