FastGPT/test/cases/service/core/dataset/textSplitter.test.ts
Archer 2ed1545eb5
V4.12.4 features (#5626)
* fix: push again, user select option button and form input radio content overflow (#5601)

* fix: push again, user select option button and form input radio content overflow

* fix: use useCallback instead of useMemo, fix unnecessary delete

* fix: Move the variable inside the component

* fix: do not pass valueLabel to MySelect

* ui

* del collection api adapt

* refactor: inherit permission (#5529)

* refactor: permission update conflict check function

* refactor(permission): app collaborator update api

* refactor(permission): support app update collaborator

* feat: support fe permission conflict check

* refactor(permission): app permission

* refactor(permission): dataset permission

* refactor(permission): team permission

* chore: fe adjust

* fix: type error

* fix: audit pagiation

* fix: tc

* chore: initv4130

* fix: app/dataset auth logic

* chore: move code

* refactor(permission): remove selfPermission

* fix: mock

* fix: test

* fix: app & dataset auth

* fix: inherit

* test(inheritPermission): test syncChildrenPermission

* prompt editor add list plugin (#5620)

* perf: search result (#5608)

* fix: table size (#5598)

* temp: list value

* backspace

* optimize code

---------

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>

* fix: fe & member list (#5619)

* chore: initv4130

* fix: MemberItemCard

* fix: MemberItemCard

* chore: fe adjust & init script

* perf: test code

* doc

* fix debug variables (#5617)

* perf: search result (#5608)

* fix: table size (#5598)

* fix debug variables

* fix

---------

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>

* perf: member ui

* fix: inherit bug (#5624)

* refactor(permission): remove getClbsWithInfo, which is useless

* fix: app list privateApp

* fix: get infos

* perf(fe): remove delete icon when it is disable in MemberItemCard

* fix: dataset private dataset

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* perf: auto coupon

* chore: upgrade script & get infos avatar  (#5625)

* fix: get infos

* chore: initv4130

* feat: support WecomRobot publish, and fix AesKey can not save bug (#5526)

* feat: resolve conflicts

* fix: add param 'show_publish_wecom'

* feat: abstract out WecomCrypto type

* doc: wecom robot document

* fix: solve instability in AI output

* doc: update some pictures

* feat: remove functions from request.ts to chat.ts and toolCall.ts

* doc: wecom robot doc update

* fix

* delete unused code

* doc: update version and prompt

* feat: remove wecom crypto, delete wecom code in workflow

* feat: delete unused codes

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* remove test

* rename init shell

* feat: collection page store

* reload sandbox

* pysandbox

* remove log

* chore: remove useless code (#5629)

* chore: remove useless code

* fix: checkConflict

* perf: support hidden type for RoleList

* fix: copy node

* update doc

* fix(permission): some bug (#5632)

* fix: app/dataset list

* fix: inherit bug

* perf: del app;i18n;save chat

* fix: test

* i18n

* fix: sumper overflow return OwnerRoleVal (#5633)

* remove invalid code

* fix: scroll

* fix: objectId

* update next

* update package

* object id

* mock redis

* feat: add redis append to resolve wecom stream response  (#5643)

* feat: resolve conflicts

* fix: add param 'show_publish_wecom'

* feat: abstract out WecomCrypto type

* doc: wecom robot document

* fix: solve instability in AI output

* doc: update some pictures

* feat: remove functions from request.ts to chat.ts and toolCall.ts

* doc: wecom robot doc update

* fix

* delete unused code

* doc: update version and prompt

* feat: remove wecom crypto, delete wecom code in workflow

* feat: delete unused codes

* feat: add redis append method

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* cache per

* fix(test): init team sub when creating mocked user (#5646)

* fix: button is not vertically centered (#5647)

* doc

* fix: gridFs objectId (#5649)

---------

Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
2025-09-15 20:02:54 +08:00

381 lines
5.3 KiB
TypeScript

import { it, expect } from 'vitest'; // 必须显式导入
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { ChunkTriggerConfigTypeEnum } from '@fastgpt/global/core/dataset/constants';
const formatChunks = (
chunks: {
q: string;
a: string;
indexes?: string[];
}[]
) => {
return chunks.map((chunk) => chunk.q.replace(/\s+/g, ''));
};
const formatResult = (result: string[]) => {
return result.map((item) => item.replace(/\s+/g, ''));
};
// 最大值分块测试-小于最大值,不分块
it(`Test splitText2Chunks 1`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd
`,
result: [
`# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.maxSize,
chunkTriggerMinSize: 1000,
maxSize: 20000,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});
// 最大值分块测试-大于最大值,分块
it(`Test splitText2Chunks 2`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`,
result: [
`# A
af da da fda a a`,
`# A
## B
阿凡撒发生的都是发大水`,
`# A
## B
### c
dsgsgfsgs22`,
`# A
## B
### c
#### D
dsgsgfsgs22`,
`# A
## B
### c
#### D
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.maxSize,
chunkTriggerMinSize: 10,
maxSize: 10,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});
// 最小值分块测试-大于最小值,不分块
it(`Test splitText2Chunks 3`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`,
result: [
`# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.minSize,
chunkTriggerMinSize: 1000,
maxSize: 1000,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});
// 最小值分块测试-小于最小值,分块
it(`Test splitText2Chunks 4`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`,
result: [
`# A
af da da fda a a`,
`# A
## B
阿凡撒发生的都是发大水`,
`# A
## B
### c
dsgsgfsgs22`,
`# A
## B
### c
#### D
dsgsgfsgs22`,
`# A
## B
### c
#### D
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.minSize,
chunkTriggerMinSize: 10,
maxSize: 10,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});
// 强制分块测试-小于最小值和最大值
it(`Test splitText2Chunks 5`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`,
result: [
`# A
af da da fda a a`,
`# A
## B
阿凡撒发生的都是发大水`,
`# A
## B
### c
dsgsgfsgs22`,
`# A
## B
### c
#### D
dsgsgfsgs22`,
`# A
## B
### c
#### D
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.forceChunk,
chunkTriggerMinSize: 1000,
maxSize: 10000,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});
// 强制分块测试-大于最小值
it(`Test splitText2Chunks 6`, async () => {
const mock = {
text: `# A
af da da fda a a
## B
阿凡撒发生的都是发大水
### c
dsgsgfsgs22
#### D
dsgsgfsgs22
##### E
dsgsgfsgs22sddddddd`,
result: [
`# A
af da da fda a a`,
`# A
## B
阿凡撒发生的都是发大水`,
`# A
## B
### c
dsgsgfsgs22`,
`# A
## B
### c
#### D
dsgsgfsgs22`,
`# A
## B
### c
#### D
##### E
dsgsgfsgs22sddddddd`
]
};
const data = await rawText2Chunks({
rawText: mock.text,
chunkTriggerType: ChunkTriggerConfigTypeEnum.forceChunk,
chunkTriggerMinSize: 10,
maxSize: 10000,
chunkSize: 512,
backupParse: false
});
expect(formatChunks(data)).toEqual(formatResult(mock.result));
});