diff --git a/docSite/assets/imgs/template/Question-answer.png b/docSite/assets/imgs/template/Question-answer.png new file mode 100644 index 000000000..a7599d7a8 Binary files /dev/null and b/docSite/assets/imgs/template/Question-answer.png differ diff --git a/docSite/assets/imgs/template/Question-answer_data.png b/docSite/assets/imgs/template/Question-answer_data.png new file mode 100644 index 000000000..0d7e88b5a Binary files /dev/null and b/docSite/assets/imgs/template/Question-answer_data.png differ diff --git a/docSite/assets/imgs/template/box.png b/docSite/assets/imgs/template/box.png new file mode 100644 index 000000000..8ee2dfa7e Binary files /dev/null and b/docSite/assets/imgs/template/box.png differ diff --git a/docSite/assets/imgs/template/import.png b/docSite/assets/imgs/template/import.png new file mode 100644 index 000000000..cfd873a00 Binary files /dev/null and b/docSite/assets/imgs/template/import.png differ diff --git a/docSite/assets/imgs/template/import_csv.png b/docSite/assets/imgs/template/import_csv.png new file mode 100644 index 000000000..b0f2f4cf3 Binary files /dev/null and b/docSite/assets/imgs/template/import_csv.png differ diff --git a/docSite/assets/imgs/template/nomal.png b/docSite/assets/imgs/template/nomal.png new file mode 100644 index 000000000..b318f6579 Binary files /dev/null and b/docSite/assets/imgs/template/nomal.png differ diff --git a/docSite/assets/imgs/template/nomal_data.png b/docSite/assets/imgs/template/nomal_data.png new file mode 100644 index 000000000..f988486b3 Binary files /dev/null and b/docSite/assets/imgs/template/nomal_data.png differ diff --git a/docSite/assets/imgs/thirddataset-1.png b/docSite/assets/imgs/thirddataset-1.png new file mode 100644 index 000000000..18ff55e38 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-1.png differ diff --git a/docSite/assets/imgs/thirddataset-10.png b/docSite/assets/imgs/thirddataset-10.png new file mode 100644 index 000000000..cf236427d Binary files /dev/null and b/docSite/assets/imgs/thirddataset-10.png differ diff --git a/docSite/assets/imgs/thirddataset-11.png b/docSite/assets/imgs/thirddataset-11.png new file mode 100644 index 000000000..6762c1c9e Binary files /dev/null and b/docSite/assets/imgs/thirddataset-11.png differ diff --git a/docSite/assets/imgs/thirddataset-12.png b/docSite/assets/imgs/thirddataset-12.png new file mode 100644 index 000000000..802b3128d Binary files /dev/null and b/docSite/assets/imgs/thirddataset-12.png differ diff --git a/docSite/assets/imgs/thirddataset-13.png b/docSite/assets/imgs/thirddataset-13.png new file mode 100644 index 000000000..30b0e18f2 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-13.png differ diff --git a/docSite/assets/imgs/thirddataset-14.png b/docSite/assets/imgs/thirddataset-14.png new file mode 100644 index 000000000..99d54bf3a Binary files /dev/null and b/docSite/assets/imgs/thirddataset-14.png differ diff --git a/docSite/assets/imgs/thirddataset-15.png b/docSite/assets/imgs/thirddataset-15.png new file mode 100644 index 000000000..a2eb73e28 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-15.png differ diff --git a/docSite/assets/imgs/thirddataset-16.png b/docSite/assets/imgs/thirddataset-16.png new file mode 100644 index 000000000..b3eac8d54 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-16.png differ diff --git a/docSite/assets/imgs/thirddataset-17.png b/docSite/assets/imgs/thirddataset-17.png new file mode 100644 index 000000000..7d8f40411 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-17.png differ diff --git a/docSite/assets/imgs/thirddataset-18.png b/docSite/assets/imgs/thirddataset-18.png new file mode 100644 index 000000000..8e415e651 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-18.png differ diff --git a/docSite/assets/imgs/thirddataset-19.png b/docSite/assets/imgs/thirddataset-19.png new file mode 100644 index 000000000..0c420a382 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-19.png differ diff --git a/docSite/assets/imgs/thirddataset-2.png b/docSite/assets/imgs/thirddataset-2.png new file mode 100644 index 000000000..bfc6e3eed Binary files /dev/null and b/docSite/assets/imgs/thirddataset-2.png differ diff --git a/docSite/assets/imgs/thirddataset-20.png b/docSite/assets/imgs/thirddataset-20.png new file mode 100644 index 000000000..89d6f4e00 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-20.png differ diff --git a/docSite/assets/imgs/thirddataset-21.png b/docSite/assets/imgs/thirddataset-21.png new file mode 100644 index 000000000..2d8f02086 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-21.png differ diff --git a/docSite/assets/imgs/thirddataset-3.png b/docSite/assets/imgs/thirddataset-3.png new file mode 100644 index 000000000..9bfc676d3 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-3.png differ diff --git a/docSite/assets/imgs/thirddataset-4.png b/docSite/assets/imgs/thirddataset-4.png new file mode 100644 index 000000000..2a171a6c8 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-4.png differ diff --git a/docSite/assets/imgs/thirddataset-5.png b/docSite/assets/imgs/thirddataset-5.png new file mode 100644 index 000000000..29c91a125 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-5.png differ diff --git a/docSite/assets/imgs/thirddataset-6.png b/docSite/assets/imgs/thirddataset-6.png new file mode 100644 index 000000000..63795251a Binary files /dev/null and b/docSite/assets/imgs/thirddataset-6.png differ diff --git a/docSite/assets/imgs/thirddataset-7.png b/docSite/assets/imgs/thirddataset-7.png new file mode 100644 index 000000000..f0e85db70 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-7.png differ diff --git a/docSite/assets/imgs/thirddataset-8.png b/docSite/assets/imgs/thirddataset-8.png new file mode 100644 index 000000000..89c955943 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-8.png differ diff --git a/docSite/assets/imgs/thirddataset-9.png b/docSite/assets/imgs/thirddataset-9.png new file mode 100644 index 000000000..c2ad9b232 Binary files /dev/null and b/docSite/assets/imgs/thirddataset-9.png differ diff --git a/docSite/content/zh-cn/docs/development/openapi/dataset.md b/docSite/content/zh-cn/docs/development/openapi/dataset.md index 934617ebf..a22f43338 100644 --- a/docSite/content/zh-cn/docs/development/openapi/dataset.md +++ b/docSite/content/zh-cn/docs/development/openapi/dataset.md @@ -295,12 +295,15 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete? | --- | --- | --- | | datasetId | 知识库ID | ✅ | | parentId: | 父级ID,不填则默认为根目录 | | -| trainingType | 数据处理方式。chunk: 按文本长度进行分割;qa: 问答对提取 | ✅ | | customPdfParse | PDF增强解析。true: 开启PDF增强解析;不填则默认为false | | +| trainingType | 数据处理方式。chunk: 按文本长度进行分割;qa: 问答对提取 | ✅ | +| chunkTriggerType | 分块条件逻辑。minSize(默认): 大于 n 时分块;maxSize: 小于文件处理模型最大上下文时分块;forceChunk: 强制分块 | | +| chunkTriggerMinSize | chunkTriggerType=minSize 时候填写,原文长度大于该值时候分块(默认 1000) | | | autoIndexes | 是否自动生成索引(仅商业版支持) | | | imageIndex | 是否自动生成图片索引(仅商业版支持) | | | chunkSettingMode | 分块参数模式。auto: 系统默认参数; custom: 手动指定参数 | | -| chunkSplitMode | 分块拆分模式。size: 按长度拆分; char: 按字符拆分。chunkSettingMode=auto时不生效。 | | +| chunkSplitMode | 分块拆分模式。paragraph:段落优先,再按长度分;size: 按长度拆分; char: 按字符拆分。chunkSettingMode=auto时不生效。 | | +| paragraphChunkDeep | 最大段落深度(默认 5) | | | chunkSize | 分块大小,默认 1500。chunkSettingMode=auto时不生效。 | | | indexSize | 索引大小,默认 512,必须小于索引模型最大token。chunkSettingMode=auto时不生效。 | | | chunkSplitter | 自定义最高优先分割符号,除非超出文件处理最大上下文,否则不会进行进一步拆分。chunkSettingMode=auto时不生效。 | | @@ -428,10 +431,7 @@ data 为集合的 ID。 "data": { "collectionId": "65abcfab9d1448617cba5f0d", "results": { - "insertLen": 5, // 分割成多少段 - "overToken": [], - "repeat": [], - "error": [] + "insertLen": 5 // 分割成多少段 } } } @@ -497,10 +497,7 @@ data 为集合的 ID。 "data": { "collectionId": "65abd0ad9d1448617cba6031", "results": { - "insertLen": 1, - "overToken": [], - "repeat": [], - "error": [] + "insertLen": 1 } } } @@ -546,7 +543,7 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio {{< tab tabName="响应示例" >}} {{< markdownify >}} -data 为集合的 ID。 +由于解析文档是异步操作,此处不会返回插入的数量。 ```json { @@ -556,10 +553,7 @@ data 为集合的 ID。 "data": { "collectionId": "65abc044e4704bac793fbd81", "results": { - "insertLen": 1, - "overToken": [], - "repeat": [], - "error": [] + "insertLen": 0 } } } @@ -632,10 +626,7 @@ data 为集合的 ID。 "data": { "collectionId": "65abc044e4704bac793fbd81", "results": { - "insertLen": 1, - "overToken": [], - "repeat": [], - "error": [] + "insertLen": 1 } } } @@ -702,10 +693,7 @@ data 为集合的 ID。 "data": { "collectionId": "6646fcedfabd823cdc6de746", "results": { - "insertLen": 1, - "overToken": [], - "repeat": [], - "error": [] + "insertLen": 1 } } } diff --git a/docSite/content/zh-cn/docs/development/upgrading/491.md b/docSite/content/zh-cn/docs/development/upgrading/491.md index 8d7fd52c6..f3ef2cb39 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/491.md +++ b/docSite/content/zh-cn/docs/development/upgrading/491.md @@ -1,5 +1,5 @@ --- -title: 'V4.9.1' +title: 'V4.9.1(包含升级脚本)' description: 'FastGPT V4.9.1 更新说明' icon: 'upgrade' draft: false diff --git a/docSite/content/zh-cn/docs/development/upgrading/4911.md b/docSite/content/zh-cn/docs/development/upgrading/4911.md index 9c9410b17..ffba53c9e 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4911.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4911.md @@ -7,11 +7,29 @@ toc: true weight: 789 --- +## 执行升级脚本 + +该脚本仅需商业版用户执行。 + +从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。 + +```bash +curl --location --request POST 'https://{{host}}/api/admin/initv4911' \ +--header 'rootkey: {{rootkey}}' \ +--header 'Content-Type: application/json' +``` + +**脚本功能** + +1. 移动第三方知识库 API 配置。 ## 🚀 新增内容 -1. 工作流中增加节点搜索功能。 -2. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。 +1. 商业版支持图片知识库。 +2. 工作流中增加节点搜索功能。 +3. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。 +4. 增加更多审计操作日志。 +5. 知识库增加文档解析异步队列,导入文档时,无需等待文档解析完毕才进行导入。 ## ⚙️ 优化 @@ -22,4 +40,6 @@ weight: 789 1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。 2. 工具调用节点前,有交互节点时,上下文异常。 3. 修复备份导入,小于 1000 字时,无法分块问题。 -4. 自定义 PDF 解析,无法保存 base64 图片。 \ No newline at end of file +4. 自定义 PDF 解析,无法保存 base64 图片。 +5. 非流请求,未进行 CITE 标记替换。 +6. Python 沙盒存在隐藏风险。 \ No newline at end of file diff --git a/docSite/content/zh-cn/docs/development/upgrading/494.md b/docSite/content/zh-cn/docs/development/upgrading/494.md index a07d37bcc..524a34519 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/494.md +++ b/docSite/content/zh-cn/docs/development/upgrading/494.md @@ -1,5 +1,5 @@ --- -title: 'V4.9.4' +title: 'V4.9.4(包含升级脚本)' description: 'FastGPT V4.9.4 更新说明' icon: 'upgrade' draft: false diff --git a/docSite/content/zh-cn/docs/guide/knowledge_base/template.md b/docSite/content/zh-cn/docs/guide/knowledge_base/template.md new file mode 100644 index 000000000..e6893225c --- /dev/null +++ b/docSite/content/zh-cn/docs/guide/knowledge_base/template.md @@ -0,0 +1,118 @@ +--- +title: '模板导入' +description: 'FastGPT 模板导入功能介绍和使用方式' +icon: 'language' +draft: false +toc: true +weight: 420 +--- + + +## 背景 + +FastGPT 提供了模板导入功能,让用户可以通过预设的 CSV 模板格式批量导入问答对数据。这种方式特别适合已经有结构化问答数据的用户,可以快速将数据导入到知识库中。 + +## 模板结构说明 + +模板采用 CSV 格式,包含以下列: + +- q: 问题列,存放用户可能提出的问题 +- a: 答案列,存放对应问题的标准答案 +- indexes: 索引列,用于存放与该问题相关的索引 + +### 示例数据 + +```csv +q,a,indexes +"你是谁呀?","我呀,是 AI 小助手哟,专门在这儿随时准备着,陪你交流、为你答疑解惑,不管是学习上的知识探讨,生活里的小疑问,还是创意灵感的碰撞,我都能尽力参与,用我的 "知识大脑" 给你提供帮助和陪伴呢,就盼着能成为你互动交流、探索世界的好伙伴呀 。","1. 你是什么?\n2. 你能做什么?\n3. 你可以解答哪些方面的疑问?\n4. 你希望成为什么样的伙伴?\n5. 你如何提供帮助?" +"你是什么?","我是 AI 小助手,专门随时准备陪用户交流、为用户答疑解惑,能参与学习上的知识探讨、生活里的小疑问以及创意灵感的碰撞,用 "知识大脑" 提供帮助和陪伴,希望成为用户互动交流、探索世界的好伙伴。","你是什么?" +"你能做什么?","能陪用户交流、为用户答疑解惑,参与学习上的知识探讨、生活里的小疑问以及创意灵感的碰撞,用 "知识大脑" 提供帮助和陪伴。","你能做什么?" +``` + +## 使用说明 + +### 1. 打开知识库,点击导入,选择模版导入 + +![](/imgs/template/import.png) + +![](/imgs/template/box.png) + +### 2. 下载模板 + +点击下载 CSV 模版,其中存在两个模式的内容 + +#### 常规模式的数据模版 + +![](/imgs/template/nomal.png) + +对应 CSV 格式为 + +![](/imgs/template/nomal_data.png) + +常规模式下,q为内容,a为空,indexes可多个 + +#### 问答对的数据模版 + +![](/imgs/template/Question-answer.png) + +对应CSV格式为 + +![](/imgs/template/Question-answer_data.png) + +问答对模式下,q为问题,a为答案,indexes即为索引部分 + +### 3. 填写数据 + +按照模板格式填写你的问答数据: +- 每一行代表一个内容或者一个问答对 +- 问题(q)始终不为空 +- 在一行内,索引部分可往后继续添加 + +### 4. 导入限制 + +- 仅支持 CSV 格式文件 +- 单个文件大小限制为 100MB +- 必须严格按照模板格式填写,否则可能导入失败 +- 每次只能导入一个文件 + +成功导入后如下: + +![](/imgs/template/import_csv.png) + +### 4. 注意事项 + +- 确保 CSV 文件使用 UTF-8 编码 +- 如果内容中包含逗号,请用双引号包裹整个内容 +- indexes 列的内容会被用作相关问题的索引,有助于提高检索准确性 +- 建议在导入大量数据前先测试少量数据 + +## 最佳实践 + +1. **数据准备** + - 确保内容或者问答对的质量,答案应该清晰、准确 + - 为每个导入的添加合适的索引关键词 + - 避免重复的内容或者问答对 + +2. **格式检查** + - 导入前检查 CSV 文件格式是否正确 + - 确保没有多余的空行或空格 + - 验证特殊字符是否正确转义 + +3. **分批导入** + - 如果数据量较大,建议分批导入 + - 每批导入后验证数据的正确性 + +## 常见问题 + +Q: 为什么我的文件导入失败了? +A: 请检查以下几点: +- 文件格式是否为 CSV +- 编码是否为 UTF-8 +- 是否严格按照模板格式填写 +- 文件大小是否超过限制 + +Q: 如何验证导入是否成功? +A: 导入成功后,你可以: +- 在知识库中搜索导入的问题 +- 通过对话测试回答的准确性 +- 查看知识库的数据统计 \ No newline at end of file diff --git a/docSite/content/zh-cn/docs/guide/knowledge_base/third_dataset.md b/docSite/content/zh-cn/docs/guide/knowledge_base/third_dataset.md new file mode 100644 index 000000000..dc93dd5fd --- /dev/null +++ b/docSite/content/zh-cn/docs/guide/knowledge_base/third_dataset.md @@ -0,0 +1,162 @@ +--- +title: '第三方知识库开发' +description: '本节详细介绍如何在FastGPT上自己接入第三方知识库' +icon: 'language' +draft: false +toc: true +weight: 410 +--- + +目前,互联网上拥有各种各样的文档库,例如飞书,语雀等等。 FastGPT 的不同用户可能使用的文档库不同,目前 FastGPT 内置了飞书、语雀文档库,如果需要接入其他文档库,可以参考本节内容。 + + +## 统一的接口规范 + +为了实现对不同文档库的统一接入,FastGPT 对第三方文档库进行了接口的规范,共包含 4 个接口内容,可以[查看 API 文件库接口](/docs/guide/knowledge_base/api_dataset)。 + +所有内置的文档库,都是基于标准的 API 文件库进行扩展。可以参考`FastGPT/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts`中的代码,进行其他文档库的扩展。一共需要完成 4 个接口开发: + +1. 获取文件列表 +2. 获取文件内容/文件链接 +3. 获取原文预览地址 +4. 获取文件详情信息 + +## 开始一个第三方文件库 + +为了方便讲解,这里以添加飞书知识库( FeishuKnowledgeDataset )为例。 + +### 1. 添加第三方文档库参数 + +首先,要进入 FastGPT 项目路径下的`FastGPT\packages\global\core\dataset\apiDataset.d.ts`文件,添加第三方文档库 Server 类型。知识库类型的字段由自己设计,主要是自己需要那些内容。例如,语雀知识库中,需要提供`userId`、`token`两个字段作为鉴权信息。 + +```ts +export type YuqueServer = { + userId: string; + token?: string; + basePath?: string; +}; +``` + +{{% alert icon="🤖 " context="success" %}} +如果文档库有`根目录`选择的功能,需要设置添加一个字段`basePath`[点击查看`根目录`功能](/docs/guide/knowledge_base/third_dataset/#添加配置表单) +{{% /alert %}} + +![](/imgs/thirddataset-1.png) + +### 2. 创建 Hook 文件 + +每个第三方文档库都会采用 Hook 的方式来实现一套 API 接口的维护,Hook 里包含 4 个函数需要完成。 + +- 在`FastGPT\packages\service\core\dataset\apiDataset\`下创建一个文档库的文件夹,然后在文件夹下创建一个`api.ts`文件 +- 在`api.ts`文件中,需要完成 4 个函数的定义,分别是: + - `listFiles`:获取文件列表 + - `getFileContent`:获取文件内容/文件链接 + - `getFileDetail`:获取文件详情信息 + - `getFilePreviewUrl`:获取原文预览地址 + +### 3. 添加知识库类型 + +在`FastGPT\packages\global\core\dataset\type.d.ts`文件中,导入自己创建的知识库类型。 + +![](/imgs/thirddataset-2.png) + +### 4. 添加知识库数据获取 + +在`FastGPT\packages\global\core\dataset\apiDataset\utils.ts`文件中,添加如下内容。 + +![](/imgs/thirddataset-3.png) + +### 5. 添加知识库调用方法 + +在`FastGPT\packages\service\core\dataset\apiDataset\index.ts`文件下,添加如下内容。 + +![](/imgs/thirddataset-4.png) + +## 添加前端 + +`FastGPT\packages\web\i18n\zh-CN\dataset.json`,`FastGPT\packages\web\i18n\en\dataset.json`和`FastGPT\packages\web\i18n\zh-Hant\dataset.json`中添加自己的 I18n 翻译,以中文翻译为例,大体需要如下几个内容: + +![](/imgs/thirddataset-5.png) + +`FastGPT\packages\service\support\operationLog\util.ts`文件下添加如下内容,以支持获取 I18n 翻译。 + +![](/imgs/thirddataset-6.png) + +{{% alert icon="🤖 " context="success" %}} +此次 I18n 翻译内容存放在`FastGPT\packages\web\i18n\zh-Hant\account_team.json`,`FastGPT\packages\web\i18n\zh-CN\account_team.json`和`FastGPT\packages\web\i18n\en\account_team.json`,字段格式为`dataset.XXX_dataset`,以飞书知识库为例,字段值为`dataset.feishu_knowledge_dataset` +{{% /alert %}} + +`FastGPT\packages\web\components\common\Icon\icons\core\dataset\`添加自己的知识库图标,一共是两个,分为`Outline`和`Color`,分别是有颜色的和无色的,具体看如下图片。 + +![](/imgs/thirddataset-7.png) + + +在`FastGPT\packages\web\components\common\Icon\constants.ts`文件中,添加自己的图标。 `import` 是图标的存放路径。 + +![](/imgs/thirddataset-8.png) + +在`FastGPT\packages\global\core\dataset\constants.ts`中,添加自己的知识库类型,分别要在`DatasetTypeEnum`和`ApiDatasetTypeMap`中添加内容。 + +| | | +| --- | --- | +| ![](/imgs/thirddataset-9.png) | ![](/imgs/thirddataset-10.png) | + +{{% alert icon="🤖 " context="success" %}} +`courseUrl`字段是相应的文档说明,如果有的话,可以添加。 +文档添加在`FastGPT\docSite\content\zh-cn\docs\guide\knowledge_base\` +`label`内容是自己之前通过 i18n 翻译添加的知识库名称的。 +`icon`和`avatar`是自己之前添加的两个图标 +{{% /alert %}} + +在`FastGPT\projects\app\src\pages\dataset\list\index.tsx`文件下,添加如下内容。这个文件负责的是知识库列表页的`新建`按钮点击后的菜单,只有在该文件添加知识库后,才能创建知识库。 + +![](/imgs/thirddataset-11.png) + +在`FastGPT\projects\app\src\pageComponents\dataset\detail\Info\index.tsx`文件下,添加如下内容。此处配置对应ui界面的如下。 + +| | | +| --- | --- | +![](/imgs/thirddataset-12.png)|![](/imgs/thirddataset-13.png) + +## 添加配置表单 + +在`FastGPT\projects\app\src\pageComponents\dataset\ApiDatasetForm.tsx`文件下,添加自己如下内容。这个文件负责的是创建知识库页的字段填写。 + +| | | | +| --- | --- | --- | +| ![](/imgs/thirddataset-14.png) | ![](/imgs/thirddataset-15.png) | ![](/imgs/thirddataset-16.png) | + +代码中添加的两个组件是对根目录选择的渲染,对应设计的 api 的 getfiledetail 方法,如果你的知识库不支持,你可以不引用。 + +``` +{renderBaseUrlSelector()} //这是对`Base URL`字段的渲染 +{renderDirectoryModal()} //点击`选择`后出现的`选择根目录`窗口,见图 +``` + +| | | +| --- | --- | +| ![](/imgs/thirddataset-17.png) | ![](/imgs/thirddataset-18.png) | + +如果知识库需要支持根目录,还需要在`ApiDatasetForm`文件中添加如下内容。 + +### 1. 解析知识库类型 + +需要从`apiDatasetServer`解析出自己的知识库类型,如图: + +![](/imgs/thirddataset-19.png) + +### 2. 添加选择根目录逻辑和`parentId`赋值逻辑 + +需要添加根目录选择逻辑,来确保用户已经填写了调动的 api 方法所必需的字段,比如 Token 之类的。 + +![](/imgs/thirddataset-20.png) + +### 3. 添加字段检查和赋值逻辑 + +需要在调用方法前再次检测是否以及获取完所有必须字段,在选择根目录后,将根目录值赋值给对应的字段。 + +![](/imgs/thirddataset-21.png) + +## 提示 + +建议知识库创建完成后,完整测试一遍知识库的功能,以确定有无漏洞,如果你的知识库添加有问题,且无法在文档找到对应的文件解决,一定是杂项没有添加完全,建议重复一次全局搜索`YuqueServer`和`yuqueServer`,检查是否有地方没有加上自己的类型。 \ No newline at end of file diff --git a/packages/global/common/file/icon.ts b/packages/global/common/file/icon.ts index 40928c5c2..f2baf7613 100644 --- a/packages/global/common/file/icon.ts +++ b/packages/global/common/file/icon.ts @@ -6,7 +6,8 @@ export const fileImgs = [ { suffix: '(doc|docs)', src: 'file/fill/doc' }, { suffix: 'txt', src: 'file/fill/txt' }, { suffix: 'md', src: 'file/fill/markdown' }, - { suffix: 'html', src: 'file/fill/html' } + { suffix: 'html', src: 'file/fill/html' }, + { suffix: '(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|tif)', src: 'image' } // { suffix: '.', src: '/imgs/files/file.svg' } ]; diff --git a/packages/global/common/frequenctLimit/type.d.ts b/packages/global/common/frequenctLimit/type.d.ts index 3f326a40c..fb5bf2105 100644 --- a/packages/global/common/frequenctLimit/type.d.ts +++ b/packages/global/common/frequenctLimit/type.d.ts @@ -2,4 +2,5 @@ export type AuthFrequencyLimitProps = { eventId: string; maxAmount: number; expiredTime: Date; + num?: number; }; diff --git a/packages/global/common/string/tools.ts b/packages/global/common/string/tools.ts index 39ca3d24c..dc4c845a9 100644 --- a/packages/global/common/string/tools.ts +++ b/packages/global/common/string/tools.ts @@ -34,7 +34,7 @@ export const valToStr = (val: any) => { }; // replace {{variable}} to value -export function replaceVariable(text: any, obj: Record) { +export function replaceVariable(text: any, obj: Record) { if (typeof text !== 'string') return text; for (const key in obj) { diff --git a/packages/global/core/dataset/api.d.ts b/packages/global/core/dataset/api.d.ts index 92dc32ed3..7dda0f15e 100644 --- a/packages/global/core/dataset/api.d.ts +++ b/packages/global/core/dataset/api.d.ts @@ -1,4 +1,9 @@ -import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type'; +import type { + ChunkSettingsType, + DatasetDataIndexItemType, + DatasetDataFieldType, + DatasetSchemaType +} from './type'; import type { DatasetCollectionTypeEnum, DatasetCollectionDataProcessModeEnum, @@ -7,12 +12,14 @@ import type { ChunkTriggerConfigTypeEnum, ParagraphChunkAIModeEnum } from './constants'; -import type { LLMModelItemType } from '../ai/model.d'; -import type { ParentIdType } from 'common/parentFolder/type'; +import type { ParentIdType } from '../../common/parentFolder/type'; /* ================= dataset ===================== */ export type DatasetUpdateBody = { id: string; + + apiDatasetServer?: DatasetSchemaType['apiDatasetServer']; + parentId?: ParentIdType; name?: string; avatar?: string; @@ -24,9 +31,6 @@ export type DatasetUpdateBody = { websiteConfig?: DatasetSchemaType['websiteConfig']; externalReadUrl?: DatasetSchemaType['externalReadUrl']; defaultPermission?: DatasetSchemaType['defaultPermission']; - apiServer?: DatasetSchemaType['apiServer']; - yuqueServer?: DatasetSchemaType['yuqueServer']; - feishuServer?: DatasetSchemaType['feishuServer']; chunkSettings?: DatasetSchemaType['chunkSettings']; // sync schedule @@ -100,6 +104,9 @@ export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollecti externalFileUrl: string; filename?: string; }; +export type ImageCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & { + collectionName: string; +}; /* ================= tag ===================== */ export type CreateDatasetCollectionTagParams = { @@ -125,8 +132,9 @@ export type PgSearchRawType = { score: number; }; export type PushDatasetDataChunkProps = { - q: string; // embedding content - a?: string; // bonus content + q?: string; + a?: string; + imageId?: string; chunkIndex?: number; indexes?: Omit[]; }; diff --git a/packages/global/core/dataset/apiDataset.d.ts b/packages/global/core/dataset/apiDataset/type.d.ts similarity index 75% rename from packages/global/core/dataset/apiDataset.d.ts rename to packages/global/core/dataset/apiDataset/type.d.ts index 524b18674..af3c94ac4 100644 --- a/packages/global/core/dataset/apiDataset.d.ts +++ b/packages/global/core/dataset/apiDataset/type.d.ts @@ -1,5 +1,5 @@ -import { RequireOnlyOne } from '../../common/type/utils'; -import type { ParentIdType } from '../../common/parentFolder/type.d'; +import { RequireOnlyOne } from '../../../common/type/utils'; +import type { ParentIdType } from '../../../common/parentFolder/type'; export type APIFileItem = { id: string; @@ -28,6 +28,12 @@ export type YuqueServer = { basePath?: string; }; +export type ApiDatasetServerType = { + apiServer?: APIFileServer; + feishuServer?: FeishuServer; + yuqueServer?: YuqueServer; +}; + // Api dataset api export type APIFileListResponse = APIFileItem[]; diff --git a/packages/global/core/dataset/apiDataset/utils.ts b/packages/global/core/dataset/apiDataset/utils.ts new file mode 100644 index 000000000..3eeb38f44 --- /dev/null +++ b/packages/global/core/dataset/apiDataset/utils.ts @@ -0,0 +1,31 @@ +import type { ApiDatasetServerType } from './type'; + +export const filterApiDatasetServerPublicData = (apiDatasetServer?: ApiDatasetServerType) => { + if (!apiDatasetServer) return undefined; + + const { apiServer, yuqueServer, feishuServer } = apiDatasetServer; + + return { + apiServer: apiServer + ? { + baseUrl: apiServer.baseUrl, + authorization: '', + basePath: apiServer.basePath + } + : undefined, + yuqueServer: yuqueServer + ? { + userId: yuqueServer.userId, + token: '', + basePath: yuqueServer.basePath + } + : undefined, + feishuServer: feishuServer + ? { + appId: feishuServer.appId, + appSecret: '', + folderToken: feishuServer.folderToken + } + : undefined + }; +}; diff --git a/packages/global/core/dataset/constants.ts b/packages/global/core/dataset/constants.ts index 25acca563..8e74b7c2a 100644 --- a/packages/global/core/dataset/constants.ts +++ b/packages/global/core/dataset/constants.ts @@ -6,45 +6,80 @@ export enum DatasetTypeEnum { dataset = 'dataset', websiteDataset = 'websiteDataset', // depp link externalFile = 'externalFile', + apiDataset = 'apiDataset', feishu = 'feishu', yuque = 'yuque' } -export const DatasetTypeMap = { + +// @ts-ignore +export const ApiDatasetTypeMap: Record< + `${DatasetTypeEnum}`, + { + icon: string; + avatar: string; + label: any; + collectionLabel: string; + courseUrl?: string; + } +> = { + [DatasetTypeEnum.apiDataset]: { + icon: 'core/dataset/externalDatasetOutline', + avatar: 'core/dataset/externalDatasetColor', + label: i18nT('dataset:api_file'), + collectionLabel: i18nT('common:File'), + courseUrl: '/docs/guide/knowledge_base/api_dataset/' + }, + [DatasetTypeEnum.feishu]: { + icon: 'core/dataset/feishuDatasetOutline', + avatar: 'core/dataset/feishuDatasetColor', + label: i18nT('dataset:feishu_dataset'), + collectionLabel: i18nT('common:File'), + courseUrl: '/docs/guide/knowledge_base/lark_dataset/' + }, + [DatasetTypeEnum.yuque]: { + icon: 'core/dataset/yuqueDatasetOutline', + avatar: 'core/dataset/yuqueDatasetColor', + label: i18nT('dataset:yuque_dataset'), + collectionLabel: i18nT('common:File'), + courseUrl: '/docs/guide/knowledge_base/yuque_dataset/' + } +}; +export const DatasetTypeMap: Record< + `${DatasetTypeEnum}`, + { + icon: string; + avatar: string; + label: any; + collectionLabel: string; + courseUrl?: string; + } +> = { + ...ApiDatasetTypeMap, [DatasetTypeEnum.folder]: { icon: 'common/folderFill', + avatar: 'common/folderFill', label: i18nT('dataset:folder_dataset'), collectionLabel: i18nT('common:Folder') }, [DatasetTypeEnum.dataset]: { icon: 'core/dataset/commonDatasetOutline', + avatar: 'core/dataset/commonDatasetColor', label: i18nT('dataset:common_dataset'), collectionLabel: i18nT('common:File') }, [DatasetTypeEnum.websiteDataset]: { icon: 'core/dataset/websiteDatasetOutline', + avatar: 'core/dataset/websiteDatasetColor', label: i18nT('dataset:website_dataset'), - collectionLabel: i18nT('common:Website') + collectionLabel: i18nT('common:Website'), + courseUrl: '/docs/guide/knowledge_base/websync/' }, [DatasetTypeEnum.externalFile]: { icon: 'core/dataset/externalDatasetOutline', + avatar: 'core/dataset/externalDatasetColor', label: i18nT('dataset:external_file'), collectionLabel: i18nT('common:File') - }, - [DatasetTypeEnum.apiDataset]: { - icon: 'core/dataset/externalDatasetOutline', - label: i18nT('dataset:api_file'), - collectionLabel: i18nT('common:File') - }, - [DatasetTypeEnum.feishu]: { - icon: 'core/dataset/feishuDatasetOutline', - label: i18nT('dataset:feishu_dataset'), - collectionLabel: i18nT('common:File') - }, - [DatasetTypeEnum.yuque]: { - icon: 'core/dataset/yuqueDatasetOutline', - label: i18nT('dataset:yuque_dataset'), - collectionLabel: i18nT('common:File') } }; @@ -77,7 +112,8 @@ export enum DatasetCollectionTypeEnum { file = 'file', link = 'link', // one link externalFile = 'externalFile', - apiFile = 'apiFile' + apiFile = 'apiFile', + images = 'images' } export const DatasetCollectionTypeMap = { [DatasetCollectionTypeEnum.folder]: { @@ -93,10 +129,13 @@ export const DatasetCollectionTypeMap = { name: i18nT('common:core.dataset.link') }, [DatasetCollectionTypeEnum.virtual]: { - name: i18nT('common:core.dataset.Manual collection') + name: i18nT('dataset:empty_collection') }, [DatasetCollectionTypeEnum.apiFile]: { name: i18nT('common:core.dataset.apiFile') + }, + [DatasetCollectionTypeEnum.images]: { + name: i18nT('dataset:core.dataset.Image collection') } }; @@ -120,7 +159,10 @@ export const DatasetCollectionSyncResultMap = { export enum DatasetCollectionDataProcessModeEnum { chunk = 'chunk', qa = 'qa', + imageParse = 'imageParse', + backup = 'backup', + template = 'template', auto = 'auto' // abandon } @@ -133,13 +175,22 @@ export const DatasetCollectionDataProcessModeMap = { label: i18nT('common:core.dataset.training.QA mode'), tooltip: i18nT('common:core.dataset.import.QA Import Tip') }, - [DatasetCollectionDataProcessModeEnum.backup]: { - label: i18nT('dataset:backup_mode'), - tooltip: i18nT('dataset:backup_mode') + [DatasetCollectionDataProcessModeEnum.imageParse]: { + label: i18nT('dataset:training.Image mode'), + tooltip: i18nT('common:core.dataset.import.Chunk Split Tip') }, [DatasetCollectionDataProcessModeEnum.auto]: { label: i18nT('common:core.dataset.training.Auto mode'), tooltip: i18nT('common:core.dataset.training.Auto mode Tip') + }, + + [DatasetCollectionDataProcessModeEnum.backup]: { + label: i18nT('dataset:backup_mode'), + tooltip: i18nT('dataset:backup_mode') + }, + [DatasetCollectionDataProcessModeEnum.template]: { + label: i18nT('dataset:template_mode'), + tooltip: i18nT('dataset:template_mode') } }; @@ -172,14 +223,17 @@ export enum ImportDataSourceEnum { fileCustom = 'fileCustom', externalFile = 'externalFile', apiDataset = 'apiDataset', - reTraining = 'reTraining' + reTraining = 'reTraining', + imageDataset = 'imageDataset' } export enum TrainingModeEnum { + parse = 'parse', chunk = 'chunk', qa = 'qa', auto = 'auto', - image = 'image' + image = 'image', + imageParse = 'imageParse' } /* ------------ search -------------- */ diff --git a/packages/global/core/dataset/controller.d.ts b/packages/global/core/dataset/controller.d.ts index 7a90ae5eb..3edf0954a 100644 --- a/packages/global/core/dataset/controller.d.ts +++ b/packages/global/core/dataset/controller.d.ts @@ -8,17 +8,19 @@ export type CreateDatasetDataProps = { chunkIndex?: number; q: string; a?: string; + imageId?: string; indexes?: Omit[]; }; export type UpdateDatasetDataProps = { dataId: string; - q?: string; + q: string; a?: string; indexes?: (Omit & { dataId?: string; // pg data id })[]; + imageId?: string; }; export type PatchIndexesProps = diff --git a/packages/global/core/dataset/image/type.d.ts b/packages/global/core/dataset/image/type.d.ts new file mode 100644 index 000000000..ed6dabe58 --- /dev/null +++ b/packages/global/core/dataset/image/type.d.ts @@ -0,0 +1,13 @@ +export type DatasetImageSchema = { + _id: string; + teamId: string; + datasetId: string; + collectionId?: string; + name: string; + contentType: string; + size: number; + metadata?: Record; + expiredTime?: Date; + createdAt: Date; + updatedAt: Date; +}; diff --git a/packages/global/core/dataset/training/utils.ts b/packages/global/core/dataset/training/utils.ts index 2686a4047..d98390e9c 100644 --- a/packages/global/core/dataset/training/utils.ts +++ b/packages/global/core/dataset/training/utils.ts @@ -1,3 +1,4 @@ +import { getEmbeddingModel } from '../../../../service/core/ai/model'; import { type EmbeddingModelItemType, type LLMModelItemType } from '../../../core/ai/model.d'; import { ChunkSettingModeEnum, @@ -26,11 +27,17 @@ export const getLLMMaxChunkSize = (model?: LLMModelItemType) => { }; // Index size -export const getMaxIndexSize = (model?: EmbeddingModelItemType) => { - return model?.maxToken || 512; +export const getMaxIndexSize = (model?: EmbeddingModelItemType | string) => { + if (!model) return 512; + const modelData = typeof model === 'string' ? getEmbeddingModel(model) : model; + + return modelData?.maxToken || 512; }; -export const getAutoIndexSize = (model?: EmbeddingModelItemType) => { - return model?.defaultToken || 512; +export const getAutoIndexSize = (model?: EmbeddingModelItemType | string) => { + if (!model) return 512; + + const modelData = typeof model === 'string' ? getEmbeddingModel(model) : model; + return modelData?.defaultToken || 512; }; const indexSizeSelectList = [ diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index f3424eaa3..664d66606 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -13,9 +13,15 @@ import type { ChunkTriggerConfigTypeEnum } from './constants'; import type { DatasetPermission } from '../../support/permission/dataset/controller'; -import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset'; +import type { + ApiDatasetServerType, + APIFileServer, + FeishuServer, + YuqueServer +} from './apiDataset/type'; import type { SourceMemberType } from 'support/user/type'; import type { DatasetDataIndexTypeEnum } from './data/constants'; +import type { ParentIdType } from 'common/parentFolder/type'; export type ChunkSettingsType = { trainingType?: DatasetCollectionDataProcessModeEnum; @@ -49,7 +55,7 @@ export type ChunkSettingsType = { export type DatasetSchemaType = { _id: string; - parentId?: string; + parentId: ParentIdType; userId: string; teamId: string; tmbId: string; @@ -72,14 +78,16 @@ export type DatasetSchemaType = { chunkSettings?: ChunkSettingsType; inheritPermission: boolean; - apiServer?: APIFileServer; - feishuServer?: FeishuServer; - yuqueServer?: YuqueServer; + + apiDatasetServer?: ApiDatasetServerType; // abandon autoSync?: boolean; externalReadUrl?: string; defaultPermission?: number; + apiServer?: APIFileServer; + feishuServer?: FeishuServer; + yuqueServer?: YuqueServer; }; export type DatasetCollectionSchemaType = ChunkSettingsType & { @@ -132,7 +140,13 @@ export type DatasetDataIndexItemType = { dataId: string; // pg data id text: string; }; -export type DatasetDataSchemaType = { + +export type DatasetDataFieldType = { + q: string; // large chunks or question + a?: string; // answer or custom content + imageId?: string; +}; +export type DatasetDataSchemaType = DatasetDataFieldType & { _id: string; userId: string; teamId: string; @@ -141,13 +155,9 @@ export type DatasetDataSchemaType = { collectionId: string; chunkIndex: number; updateTime: Date; - q: string; // large chunks or question - a: string; // answer or custom content - history?: { - q: string; - a: string; + history?: (DatasetDataFieldType & { updateTime: Date; - }[]; + })[]; forbid?: boolean; fullTextToken: string; indexes: DatasetDataIndexItemType[]; @@ -174,11 +184,12 @@ export type DatasetTrainingSchemaType = { expireAt: Date; lockTime: Date; mode: TrainingModeEnum; - model: string; - prompt: string; + model?: string; + prompt?: string; dataId?: string; q: string; a: string; + imageId?: string; chunkIndex: number; indexSize?: number; weight: number; @@ -244,20 +255,18 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & { }; /* ================= data ===================== */ -export type DatasetDataItemType = { +export type DatasetDataItemType = DatasetDataFieldType & { id: string; teamId: string; datasetId: string; + imagePreivewUrl?: string; updateTime: Date; collectionId: string; sourceName: string; sourceId?: string; - q: string; - a: string; chunkIndex: number; indexes: DatasetDataIndexItemType[]; isOwner: boolean; - // permission: DatasetPermission; }; /* --------------- file ---------------------- */ @@ -284,3 +293,14 @@ export type SearchDataResponseItemType = Omit< score: { type: `${SearchScoreTypeEnum}`; value: number; index: number }[]; // score: number; }; + +export type DatasetCiteItemType = { + _id: string; + q: string; + a?: string; + imagePreivewUrl?: string; + history?: DatasetDataSchemaType['history']; + updateTime: DatasetDataSchemaType['updateTime']; + index: DatasetDataSchemaType['chunkIndex']; + updated?: boolean; +}; diff --git a/packages/global/core/dataset/utils.ts b/packages/global/core/dataset/utils.ts index 17167b12a..42dbc2315 100644 --- a/packages/global/core/dataset/utils.ts +++ b/packages/global/core/dataset/utils.ts @@ -2,10 +2,15 @@ import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants'; import { getFileIcon } from '../../common/file/icon'; import { strIsLink } from '../../common/string/tools'; -export function getCollectionIcon( - type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file, - name = '' -) { +export function getCollectionIcon({ + type = DatasetCollectionTypeEnum.file, + name = '', + sourceId +}: { + type?: DatasetCollectionTypeEnum; + name?: string; + sourceId?: string; +}) { if (type === DatasetCollectionTypeEnum.folder) { return 'common/folderFill'; } @@ -15,7 +20,10 @@ export function getCollectionIcon( if (type === DatasetCollectionTypeEnum.virtual) { return 'file/fill/manual'; } - return getFileIcon(name); + if (type === DatasetCollectionTypeEnum.images) { + return 'core/dataset/imageFill'; + } + return getSourceNameIcon({ sourceName: name, sourceId }); } export function getSourceNameIcon({ sourceName, diff --git a/packages/plugins/src/bocha/template.json b/packages/plugins/src/bocha/template.json index dc84bf4ec..614bec8bc 100644 --- a/packages/plugins/src/bocha/template.json +++ b/packages/plugins/src/bocha/template.json @@ -1,677 +1,489 @@ { - "author": "", - "name": "博查搜索", - "avatar": "core/workflow/template/bocha", - "intro": "使用博查AI搜索引擎进行网络搜索。", - "showStatus": true, - "weight": 10, - "courseUrl": "", - "isTool": true, - "templateType": "search", - "workflow": { - "nodes": [ - { - "nodeId": "pluginInput", - "name": "workflow:template.plugin_start", - "intro": "workflow:intro_plugin_input", - "avatar": "core/workflow/template/workflowStart", - "flowNodeType": "pluginInput", - "showStatus": false, - "position": { - "x": 636.3048409085379, - "y": -238.61714728578016 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": [ - "input" - ], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "apiKey", - "label": "apiKey", - "description": "博查API密钥", - "defaultValue": "", - "required": true - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "query", - "label": "query", - "description": "搜索查询词", - "defaultValue": "", - "required": true, - "toolDescription": "搜索查询词" - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "freshness", - "label": "freshness", - "description": "搜索指定时间范围内的网页。可填值:oneDay(一天内)、oneWeek(一周内)、oneMonth(一个月内)、oneYear(一年内)、noLimit(不限,默认)、YYYY-MM-DD..YYYY-MM-DD(日期范围)、YYYY-MM-DD(指定日期)", - "defaultValue": "noLimit", - "required": false, - "toolDescription": "搜索时间范围" - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "summary", - "label": "summary", - "description": "是否显示文本摘要。true显示,false不显示(默认)", - "defaultValue": false, - "required": false, - "toolDescription": "是否显示文本摘要" - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "include", - "label": "include", - "description": "指定搜索的site范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com", - "defaultValue": "", - "required": false, - "toolDescription": "指定搜索的site范围" - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "exclude", - "label": "exclude", - "description": "排除搜索的网站范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com", - "defaultValue": "", - "required": false, - "toolDescription": "排除搜索的网站范围" - }, - { - "renderTypeList": [ - "input", - "reference" - ], - "selectedTypeIndex": 0, - "valueType": "number", - "canEdit": true, - "key": "count", - "label": "count", - "description": "返回结果的条数。可填范围:1-50,默认为10", - "defaultValue": 10, - "required": false, - "min": 1, - "max": 50, - "toolDescription": "返回结果条数" - } - ], - "outputs": [ - { - "id": "apiKey", - "valueType": "string", - "key": "apiKey", - "label": "apiKey", - "type": "hidden" - }, - { - "id": "query", - "valueType": "string", - "key": "query", - "label": "query", - "type": "hidden" - }, - { - "id": "freshness", - "valueType": "string", - "key": "freshness", - "label": "freshness", - "type": "hidden" - }, - { - "id": "summary", - "valueType": "boolean", - "key": "summary", - "label": "summary", - "type": "hidden" - }, - { - "id": "include", - "valueType": "string", - "key": "include", - "label": "include", - "type": "hidden" - }, - { - "id": "exclude", - "valueType": "string", - "key": "exclude", - "label": "exclude", - "type": "hidden" - }, - { - "id": "count", - "valueType": "number", - "key": "count", - "label": "count", - "type": "hidden" - } - ] - }, - { - "nodeId": "pluginOutput", - "name": "common:core.module.template.self_output", - "intro": "workflow:intro_custom_plugin_output", - "avatar": "core/workflow/template/pluginOutput", - "flowNodeType": "pluginOutput", - "showStatus": false, - "position": { - "x": 2764.1105686698083, - "y": -30.617147285780163 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": [ - "reference" - ], - "valueType": "object", - "canEdit": true, - "key": "result", - "label": "result", - "isToolOutput": true, - "description": "", - "value": [ - "nyA6oA8mF1iW", - "httpRawResponse" - ] - } - ], - "outputs": [] - }, - { - "nodeId": "pluginConfig", - "name": "common:core.module.template.system_config", - "intro": "", - "avatar": "core/workflow/template/systemConfig", - "flowNodeType": "pluginConfig", - "position": { - "x": 184.66337662472682, - "y": -216.05298493910115 - }, - "version": "4811", - "inputs": [], - "outputs": [] - }, - { - "nodeId": "nyA6oA8mF1iW", - "name": "HTTP 请求", - "intro": "调用博查搜索API", - "avatar": "core/workflow/template/httpRequest", - "flowNodeType": "httpRequest468", - "showStatus": true, - "position": { - "x": 1335.0647252518884, - "y": -455.9043948565971 - }, - "version": "481", - "inputs": [ - { - "key": "system_addInputParam", - "renderTypeList": [ - "addInputParam" - ], - "valueType": "dynamic", - "label": "", - "required": false, - "description": "common:core.module.input.description.HTTP Dynamic Input", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectDataset", - "selectApp" - ], - "showDescription": false, - "showDefaultValue": true - }, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpMethod", - "renderTypeList": [ - "custom" - ], - "valueType": "string", - "label": "", - "value": "POST", - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpTimeout", - "renderTypeList": [ - "custom" - ], - "valueType": "number", - "label": "", - "value": 30, - "min": 5, - "max": 600, - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpReqUrl", - "renderTypeList": [ - "hidden" - ], - "valueType": "string", - "label": "", - "description": "common:core.module.input.description.Http Request Url", - "placeholder": "https://api.ai.com/getInventory", - "required": false, - "value": "https://api.bochaai.com/v1/web-search", - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpHeader", - "renderTypeList": [ - "custom" - ], - "valueType": "any", - "value": [ - { - "key": "Authorization", - "type": "string", - "value": "Bearer {{$pluginInput.apiKey$}}" - }, - { - "key": "Content-Type", - "type": "string", - "value": "application/json" - } - ], - "label": "", - "description": "common:core.module.input.description.Http Request Header", - "placeholder": "common:core.module.input.description.Http Request Header", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpParams", - "renderTypeList": [ - "hidden" - ], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpJsonBody", - "renderTypeList": [ - "hidden" - ], - "valueType": "any", - "value": "{\n \"query\": \"{{query}}\",\n \"freshness\": \"{{freshness}}\",\n \"summary\": {{summary}},\n \"include\": \"{{include}}\",\n \"exclude\": \"{{exclude}}\",\n \"count\": {{count}}\n}", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpFormBody", - "renderTypeList": [ - "hidden" - ], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpContentType", - "renderTypeList": [ - "hidden" - ], - "valueType": "string", - "value": "json", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "valueType": "string", - "renderTypeList": [ - "reference" - ], - "key": "query", - "label": "query", - "toolDescription": "博查搜索检索词", - "required": true, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "query" - ] - }, - { - "valueType": "string", - "renderTypeList": [ - "reference" - ], - "key": "freshness", - "label": "freshness", - "toolDescription": "搜索时间范围", - "required": false, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "freshness" - ] - }, - { - "valueType": "boolean", - "renderTypeList": [ - "reference" - ], - "key": "summary", - "label": "summary", - "toolDescription": "是否显示文本摘要", - "required": false, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "summary" - ] - }, - { - "valueType": "string", - "renderTypeList": [ - "reference" - ], - "key": "include", - "label": "include", - "toolDescription": "指定搜索的site范围", - "required": false, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "include" - ] - }, - { - "valueType": "string", - "renderTypeList": [ - "reference" - ], - "key": "exclude", - "label": "exclude", - "toolDescription": "排除搜索的网站范围", - "required": false, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "exclude" - ] - }, - { - "valueType": "number", - "renderTypeList": [ - "reference" - ], - "key": "count", - "label": "count", - "toolDescription": "返回结果条数", - "required": false, - "canEdit": true, - "editField": { - "key": true, - "description": true - }, - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "value": [ - "pluginInput", - "count" - ] - } - ], - "outputs": [ - { - "id": "error", - "key": "error", - "label": "workflow:request_error", - "description": "HTTP请求错误信息,成功时返回空", - "valueType": "object", - "type": "static" - }, - { - "id": "httpRawResponse", - "key": "httpRawResponse", - "required": true, - "label": "workflow:raw_response", - "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", - "valueType": "any", - "type": "static" - }, - { - "id": "system_addOutputParam", - "key": "system_addOutputParam", - "type": "dynamic", - "valueType": "dynamic", - "label": "", - "editField": { - "key": true, - "valueType": true - } - } - ] - } + "author": "", + "name": "博查搜索", + "avatar": "core/workflow/template/bocha", + "intro": "使用博查AI搜索引擎进行网络搜索。", + "showStatus": true, + "weight": 10, + "courseUrl": "", + "isTool": true, + "templateType": "search", + "workflow": { + "nodes": [ + { + "nodeId": "pluginInput", + "name": "workflow:template.plugin_start", + "intro": "workflow:intro_plugin_input", + "avatar": "core/workflow/template/workflowStart", + "flowNodeType": "pluginInput", + "showStatus": false, + "position": { + "x": 636.3048409085379, + "y": -238.61714728578016 + }, + "version": "481", + "inputs": [ + { + "renderTypeList": ["input"], + "selectedTypeIndex": 0, + "valueType": "string", + "canEdit": true, + "key": "apiKey", + "label": "apiKey", + "description": "博查API密钥", + "defaultValue": "", + "required": true + }, + { + "renderTypeList": ["input", "reference"], + "selectedTypeIndex": 0, + "valueType": "string", + "canEdit": true, + "key": "query", + "label": "query", + "description": "搜索查询词", + "defaultValue": "", + "required": true, + "toolDescription": "搜索查询词" + }, + { + "renderTypeList": ["select", "reference"], + "selectedTypeIndex": 0, + "valueType": "string", + "canEdit": true, + "key": "freshness", + "label": "freshness", + "description": "搜索指定时间范围内的网页。", + "defaultValue": "noLimit", + "required": false, + "toolDescription": "搜索指定时间范围内的网页。", + "list": [ + { + "label": "noLimit", + "value": "noLimit" + }, + { + "label": "oneDay", + "value": "oneDay" + }, + { + "label": "oneWeek", + "value": "oneWeek" + }, + { + "label": "oneMonth", + "value": "oneMonth" + }, + { + "label": "oneYear", + "value": "oneYear" + } + ] + }, + { + "renderTypeList": ["switch", "reference"], + "selectedTypeIndex": 0, + "valueType": "boolean", + "canEdit": true, + "key": "summary", + "label": "summary", + "description": "是否显示文本摘要。", + "defaultValue": true, + "required": false, + "list": [] + }, + { + "renderTypeList": ["input", "reference"], + "selectedTypeIndex": 0, + "valueType": "string", + "canEdit": true, + "key": "include", + "label": "include", + "description": "指定搜索的site范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com", + "defaultValue": "", + "required": false, + "list": [] + }, + { + "renderTypeList": ["input", "reference"], + "selectedTypeIndex": 0, + "valueType": "string", + "canEdit": true, + "key": "exclude", + "label": "exclude", + "description": "排除搜索的网站范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com", + "defaultValue": "", + "required": false, + "list": [] + }, + { + "renderTypeList": ["numberInput", "reference"], + "selectedTypeIndex": 0, + "valueType": "number", + "canEdit": true, + "key": "count", + "label": "count", + "description": "返回结果的条数。可填范围:1-50,默认为10", + "defaultValue": 10, + "required": false, + "min": 1, + "max": 50, + "list": [] + } ], - "edges": [ - { - "source": "pluginInput", - "target": "nyA6oA8mF1iW", - "sourceHandle": "pluginInput-source-right", - "targetHandle": "nyA6oA8mF1iW-target-left" - }, - { - "source": "nyA6oA8mF1iW", - "target": "pluginOutput", - "sourceHandle": "nyA6oA8mF1iW-source-right", - "targetHandle": "pluginOutput-target-left" - } + "outputs": [ + { + "id": "apiKey", + "valueType": "string", + "key": "apiKey", + "label": "apiKey", + "type": "hidden" + }, + { + "id": "query", + "valueType": "string", + "key": "query", + "label": "query", + "type": "hidden" + }, + { + "id": "freshness", + "valueType": "string", + "key": "freshness", + "label": "freshness", + "type": "hidden" + }, + { + "id": "summary", + "valueType": "boolean", + "key": "summary", + "label": "summary", + "type": "hidden" + }, + { + "id": "include", + "valueType": "string", + "key": "include", + "label": "include", + "type": "hidden" + }, + { + "id": "exclude", + "valueType": "string", + "key": "exclude", + "label": "exclude", + "type": "hidden" + }, + { + "id": "count", + "valueType": "number", + "key": "count", + "label": "count", + "type": "hidden" + } ] - }, - "chatConfig": {} -} \ No newline at end of file + }, + { + "nodeId": "pluginOutput", + "name": "common:core.module.template.self_output", + "intro": "workflow:intro_custom_plugin_output", + "avatar": "core/workflow/template/pluginOutput", + "flowNodeType": "pluginOutput", + "showStatus": false, + "position": { + "x": 2289.548741109713, + "y": -113.61714728578016 + }, + "version": "481", + "inputs": [ + { + "renderTypeList": ["reference"], + "valueType": "arrayObject", + "canEdit": true, + "key": "result", + "label": "result", + "isToolOutput": true, + "description": "", + "value": [["cixTtgyy4gK43lPD", "hb4MQP8oGxPx1f9G"]], + "required": true + }, + { + "renderTypeList": ["reference"], + "valueType": "object", + "canEdit": true, + "key": "error", + "label": "error", + "isToolOutput": true, + "description": "", + "required": true, + "value": ["cixTtgyy4gK43lPD", "error"] + } + ], + "outputs": [] + }, + { + "nodeId": "pluginConfig", + "name": "common:core.module.template.system_config", + "intro": "", + "avatar": "core/workflow/template/systemConfig", + "flowNodeType": "pluginConfig", + "position": { + "x": 184.66337662472682, + "y": -216.05298493910115 + }, + "version": "4811", + "inputs": [], + "outputs": [] + }, + { + "nodeId": "cixTtgyy4gK43lPD", + "name": "HTTP 请求#2", + "intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)", + "avatar": "core/workflow/template/httpRequest", + "flowNodeType": "httpRequest468", + "showStatus": true, + "position": { + "x": 1336.2141952438083, + "y": -513.6171472857802 + }, + "inputs": [ + { + "key": "system_addInputParam", + "renderTypeList": ["addInputParam"], + "valueType": "dynamic", + "label": "", + "required": false, + "description": "接收前方节点的输出值作为变量,这些变量可以被 HTTP 请求参数使用。", + "customInputConfig": { + "selectValueTypeList": [ + "string", + "number", + "boolean", + "object", + "arrayString", + "arrayNumber", + "arrayBoolean", + "arrayObject", + "arrayAny", + "any", + "chatHistory", + "datasetQuote", + "dynamic", + "selectDataset", + "selectApp" + ], + "showDescription": false, + "showDefaultValue": true + }, + "valueDesc": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpMethod", + "renderTypeList": ["custom"], + "valueType": "string", + "label": "", + "value": "POST", + "required": true, + "valueDesc": "", + "description": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpTimeout", + "renderTypeList": ["custom"], + "valueType": "number", + "label": "", + "value": 30, + "min": 5, + "max": 600, + "required": true, + "valueDesc": "", + "description": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpReqUrl", + "renderTypeList": ["hidden"], + "valueType": "string", + "label": "", + "description": "新的 HTTP 请求地址。如果出现两个“请求地址”,可以删除该模块重新加入,会拉取最新的模块配置。", + "placeholder": "https://api.ai.com/getInventory", + "required": false, + "value": "https://api.bochaai.com/v1/web-search", + "valueDesc": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpHeader", + "renderTypeList": ["custom"], + "valueType": "any", + "value": [ + { + "key": "Authorization", + "type": "string", + "value": "Bearer {{$pluginInput.apiKey$}}" + } + ], + "label": "", + "description": "自定义请求头,请严格填入 JSON 字符串。\n1. 确保最后一个属性没有逗号\n2. 确保 key 包含双引号\n例如:{\"Authorization\":\"Bearer xxx\"}", + "placeholder": "common:core.module.input.description.Http Request Header", + "required": false, + "valueDesc": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpParams", + "renderTypeList": ["hidden"], + "valueType": "any", + "value": [], + "label": "", + "required": false, + "valueDesc": "", + "description": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpJsonBody", + "renderTypeList": ["hidden"], + "valueType": "any", + "value": "{\n \"query\": \"{{$pluginInput.query$}}\",\n \"freshness\": \"{{$pluginInput.freshness$}}\",\n \"summary\": {{$pluginInput.summary$}},\n \"include\": \"{{$pluginInput.include$}}\",\n \"exclude\": \"{{$pluginInput.exclude$}}\",\n \"count\": {{$pluginInput.count$}}\n}", + "label": "", + "required": false, + "valueDesc": "", + "description": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpFormBody", + "renderTypeList": ["hidden"], + "valueType": "any", + "value": [], + "label": "", + "required": false, + "valueDesc": "", + "description": "", + "debugLabel": "", + "toolDescription": "" + }, + { + "key": "system_httpContentType", + "renderTypeList": ["hidden"], + "valueType": "string", + "value": "json", + "label": "", + "required": false + } + ], + "outputs": [ + { + "id": "system_addOutputParam", + "key": "system_addOutputParam", + "type": "dynamic", + "valueType": "dynamic", + "label": "输出字段提取", + "customFieldConfig": { + "selectValueTypeList": [ + "string", + "number", + "boolean", + "object", + "arrayString", + "arrayNumber", + "arrayBoolean", + "arrayObject", + "arrayAny", + "any", + "chatHistory", + "datasetQuote", + "dynamic", + "selectDataset", + "selectApp" + ], + "showDescription": false, + "showDefaultValue": false + }, + "description": "可以通过 JSONPath 语法来提取响应值中的指定字段", + "valueDesc": "" + }, + { + "id": "error", + "key": "error", + "label": "请求错误", + "description": "HTTP请求错误信息,成功时返回空", + "valueType": "object", + "type": "static", + "valueDesc": "" + }, + { + "id": "httpRawResponse", + "key": "httpRawResponse", + "required": true, + "label": "原始响应", + "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", + "valueType": "any", + "type": "static", + "valueDesc": "" + }, + { + "id": "hb4MQP8oGxPx1f9G", + "valueType": "arrayObject", + "type": "dynamic", + "key": "$.data.webPages.value", + "label": "$.data.webPages.value" + } + ] + } + ], + "edges": [ + { + "source": "pluginInput", + "target": "cixTtgyy4gK43lPD", + "sourceHandle": "pluginInput-source-right", + "targetHandle": "cixTtgyy4gK43lPD-target-left" + }, + { + "source": "cixTtgyy4gK43lPD", + "target": "pluginOutput", + "sourceHandle": "cixTtgyy4gK43lPD-source-right", + "targetHandle": "pluginOutput-target-left" + } + ], + "chatConfig": { + "variables": [], + "_id": "67bd78aeebcd3c6700e82e5e", + "questionGuide": { + "open": false, + "model": "gpt-4o-mini", + "customPrompt": "" + }, + "ttsConfig": { + "type": "web" + }, + "whisperConfig": { + "open": false, + "autoSend": false, + "autoTTSResponse": false + }, + "chatInputGuide": { + "open": false, + "textList": [], + "customUrl": "" + }, + "instruction": "", + "autoExecute": { + "open": false, + "defaultPrompt": "" + }, + "welcomeText": "" + } + }, + "chatConfig": {} +} diff --git a/packages/service/common/api/type.d.ts b/packages/service/common/api/type.d.ts index 2a9af9e2b..09f596e00 100644 --- a/packages/service/common/api/type.d.ts +++ b/packages/service/common/api/type.d.ts @@ -1,5 +1,8 @@ -import type { ApiDatasetDetailResponse } from '@fastgpt/global/core/dataset/apiDataset'; -import { FeishuServer, YuqueServer } from '@fastgpt/global/core/dataset/apiDataset'; +import type { + ApiDatasetDetailResponse, + FeishuServer, + YuqueServer +} from '@fastgpt/global/core/dataset/apiDataset/type'; import type { DeepRagSearchProps, SearchDatasetDataResponse diff --git a/packages/service/common/buffer/rawText/controller.ts b/packages/service/common/buffer/rawText/controller.ts index 59370d033..8750494a3 100644 --- a/packages/service/common/buffer/rawText/controller.ts +++ b/packages/service/common/buffer/rawText/controller.ts @@ -142,23 +142,26 @@ export const updateRawTextBufferExpiredTime = async ({ }; export const clearExpiredRawTextBufferCron = async () => { + const gridBucket = getGridBucket(); + const clearExpiredRawTextBuffer = async () => { addLog.debug('Clear expired raw text buffer start'); - const gridBucket = getGridBucket(); - return retryFn(async () => { - const data = await MongoRawTextBufferSchema.find( - { - 'metadata.expiredTime': { $lt: new Date() } - }, - '_id' - ).lean(); + const data = await MongoRawTextBufferSchema.find( + { + 'metadata.expiredTime': { $lt: new Date() } + }, + '_id' + ).lean(); - for (const item of data) { + for (const item of data) { + try { await gridBucket.delete(item._id); + } catch (error) { + addLog.error('Delete expired raw text buffer error', error); } - addLog.debug('Clear expired raw text buffer end'); - }); + } + addLog.debug('Clear expired raw text buffer end'); }; setCron('*/10 * * * *', async () => { diff --git a/packages/service/common/file/gridfs/controller.ts b/packages/service/common/file/gridfs/controller.ts index 05708ed20..b3a694b76 100644 --- a/packages/service/common/file/gridfs/controller.ts +++ b/packages/service/common/file/gridfs/controller.ts @@ -7,12 +7,13 @@ import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema'; import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { readRawContentByFileBuffer } from '../read/utils'; -import { gridFsStream2Buffer, stream2Encoding } from './utils'; +import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils'; import { addLog } from '../../system/log'; import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools'; import { Readable } from 'stream'; import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller'; import { addMinutes } from 'date-fns'; +import { retryFn } from '@fastgpt/global/common/system/utils'; export function getGFSCollection(bucket: `${BucketNameEnum}`) { MongoDatasetFileSchema; @@ -64,23 +65,7 @@ export async function uploadFile({ // create a gridfs bucket const bucket = getGridBucket(bucketName); - const fileSize = stats.size; - // 单块大小:尽可能大,但不超过 14MB,不小于512KB - const chunkSizeBytes = (() => { - // 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB - const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024); - - // 确保块大小至少为512KB - const minChunkSize = 512 * 1024; // 512KB - - // 取理想块大小和最小块大小中的较大值 - let chunkSize = Math.max(idealChunkSize, minChunkSize); - - // 将块大小向上取整到最接近的64KB的倍数,使其更整齐 - chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024); - - return chunkSize; - })(); + const chunkSizeBytes = computeGridFsChunSize(stats.size); const stream = bucket.openUploadStream(filename, { metadata, @@ -173,24 +158,18 @@ export async function getFileById({ export async function delFileByFileIdList({ bucketName, - fileIdList, - retry = 3 + fileIdList }: { bucketName: `${BucketNameEnum}`; fileIdList: string[]; - retry?: number; }): Promise { - try { + return retryFn(async () => { const bucket = getGridBucket(bucketName); for await (const fileId of fileIdList) { await bucket.delete(new Types.ObjectId(fileId)); } - } catch (error) { - if (retry > 0) { - return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 }); - } - } + }); } export async function getDownloadStream({ diff --git a/packages/service/common/file/gridfs/utils.ts b/packages/service/common/file/gridfs/utils.ts index c743b7136..4c72fb61d 100644 --- a/packages/service/common/file/gridfs/utils.ts +++ b/packages/service/common/file/gridfs/utils.ts @@ -105,3 +105,20 @@ export const stream2Encoding = async (stream: NodeJS.ReadableStream) => { stream: copyStream }; }; + +// 单块大小:尽可能大,但不超过 14MB,不小于512KB +export const computeGridFsChunSize = (fileSize: number) => { + // 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB + const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024); + + // 确保块大小至少为512KB + const minChunkSize = 512 * 1024; // 512KB + + // 取理想块大小和最小块大小中的较大值 + let chunkSize = Math.max(idealChunkSize, minChunkSize); + + // 将块大小向上取整到最接近的64KB的倍数,使其更整齐 + chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024); + + return chunkSize; +}; diff --git a/packages/service/common/file/multer.ts b/packages/service/common/file/multer.ts index f2c159085..235a61df9 100644 --- a/packages/service/common/file/multer.ts +++ b/packages/service/common/file/multer.ts @@ -22,7 +22,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => { maxSize *= 1024 * 1024; class UploadModel { - uploader = multer({ + uploaderSingle = multer({ limits: { fieldSize: maxSize }, @@ -41,8 +41,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => { } }) }).single('file'); - - async doUpload( + async getUploadFile( req: NextApiRequest, res: NextApiResponse, originBucketName?: `${BucketNameEnum}` @@ -54,7 +53,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => { bucketName?: `${BucketNameEnum}`; }>((resolve, reject) => { // @ts-ignore - this.uploader(req, res, (error) => { + this.uploaderSingle(req, res, (error) => { if (error) { return reject(error); } @@ -94,6 +93,58 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => { }); }); } + + uploaderMultiple = multer({ + limits: { + fieldSize: maxSize + }, + preservePath: true, + storage: multer.diskStorage({ + // destination: (_req, _file, cb) => { + // cb(null, tmpFileDirPath); + // }, + filename: (req, file, cb) => { + if (!file?.originalname) { + cb(new Error('File not found'), ''); + } else { + const { ext } = path.parse(decodeURIComponent(file.originalname)); + cb(null, `${getNanoid()}${ext}`); + } + } + }) + }).array('file', global.feConfigs?.uploadFileMaxSize); + async getUploadFiles(req: NextApiRequest, res: NextApiResponse) { + return new Promise<{ + files: FileType[]; + data: T; + }>((resolve, reject) => { + // @ts-ignore + this.uploaderMultiple(req, res, (error) => { + if (error) { + console.log(error); + return reject(error); + } + + // @ts-ignore + const files = req.files as FileType[]; + + resolve({ + files: files.map((file) => ({ + ...file, + originalname: decodeURIComponent(file.originalname) + })), + data: (() => { + if (!req.body?.data) return {}; + try { + return JSON.parse(req.body.data); + } catch (error) { + return {}; + } + })() + }); + }); + }); + } } return new UploadModel(); diff --git a/packages/service/common/system/frequencyLimit/utils.ts b/packages/service/common/system/frequencyLimit/utils.ts index 50cb7c88b..e62cf1679 100644 --- a/packages/service/common/system/frequencyLimit/utils.ts +++ b/packages/service/common/system/frequencyLimit/utils.ts @@ -4,7 +4,8 @@ import { MongoFrequencyLimit } from './schema'; export const authFrequencyLimit = async ({ eventId, maxAmount, - expiredTime + expiredTime, + num = 1 }: AuthFrequencyLimitProps) => { try { // 对应 eventId 的 account+1, 不存在的话,则创建一个 @@ -14,7 +15,7 @@ export const authFrequencyLimit = async ({ expiredTime: { $gte: new Date() } }, { - $inc: { amount: 1 }, + $inc: { amount: num }, // If not exist, set the expiredTime $setOnInsert: { expiredTime } }, diff --git a/packages/service/common/system/timerLock/constants.ts b/packages/service/common/system/timerLock/constants.ts index 010711257..76189686c 100644 --- a/packages/service/common/system/timerLock/constants.ts +++ b/packages/service/common/system/timerLock/constants.ts @@ -6,7 +6,9 @@ export enum TimerIdEnum { updateStandardPlan = 'updateStandardPlan', scheduleTriggerApp = 'scheduleTriggerApp', notification = 'notification', - clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer' + + clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer', + clearExpiredDatasetImage = 'clearExpiredDatasetImage' } export enum LockNotificationEnum { diff --git a/packages/service/core/ai/config/provider/Qwen.json b/packages/service/core/ai/config/provider/Qwen.json index 84aea9ae3..b617f619a 100644 --- a/packages/service/core/ai/config/provider/Qwen.json +++ b/packages/service/core/ai/config/provider/Qwen.json @@ -548,12 +548,27 @@ "showTopP": false, "showStopSign": false }, + { + "model": "text-embedding-v4", + "name": "text-embedding-v4", + "defaultToken": 512, + "maxToken": 8000, + "type": "embedding", + "defaultConfig": { + "dimensions": 1536 + } + }, { "model": "text-embedding-v3", "name": "text-embedding-v3", "defaultToken": 512, "maxToken": 8000, "type": "embedding" + }, + { + "model": "gte-rerank-v2", + "name": "gte-rerank-v2", + "type": "rerank" } ] } diff --git a/packages/service/core/ai/model.ts b/packages/service/core/ai/model.ts index 2c53498c5..55a4f9747 100644 --- a/packages/service/core/ai/model.ts +++ b/packages/service/core/ai/model.ts @@ -20,6 +20,10 @@ export const getVlmModel = (model?: string) => { ?.find((item) => item.model === model || item.name === model); }; +export const getVlmModelList = () => { + return Array.from(global.llmModelMap.values())?.filter((item) => item.vision) || []; +}; + export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!; export const getEmbeddingModel = (model?: string) => { if (!model) return getDefaultEmbeddingModel(); diff --git a/packages/service/core/ai/utils.ts b/packages/service/core/ai/utils.ts index 24866578c..e9a0e7f30 100644 --- a/packages/service/core/ai/utils.ts +++ b/packages/service/core/ai/utils.ts @@ -227,8 +227,10 @@ export const parseReasoningContent = (text: string): [string, string] => { export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => { return retainDatasetCite - ? text.replace(/\[id\]\(CITE\)/g, '') - : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '').replace(/\[id\]\(CITE\)/g, ''); + ? text.replace(/[\[【]id[\]】]\(CITE\)/g, '') + : text + .replace(/[\[【]([a-f0-9]{24})[\]】](?:\([^\)]*\)?)?/g, '') + .replace(/[\[【]id[\]】]\(CITE\)/g, ''); }; // Parse llm stream part @@ -426,8 +428,8 @@ export const parseLLMStreamResponse = () => { } // 新内容包含 [,初始化缓冲数据 - if (text.includes('[')) { - const index = text.indexOf('['); + if (text.includes('[') || text.includes('【')) { + const index = text.indexOf('[') !== -1 ? text.indexOf('[') : text.indexOf('【'); const beforeContent = citeBuffer + text.slice(0, index); citeBuffer = text.slice(index); diff --git a/packages/service/core/dataset/apiDataset/api.ts b/packages/service/core/dataset/apiDataset/custom/api.ts similarity index 87% rename from packages/service/core/dataset/apiDataset/api.ts rename to packages/service/core/dataset/apiDataset/custom/api.ts index e9441db43..d0a9810a7 100644 --- a/packages/service/core/dataset/apiDataset/api.ts +++ b/packages/service/core/dataset/apiDataset/custom/api.ts @@ -3,14 +3,15 @@ import type { ApiFileReadContentResponse, APIFileReadResponse, ApiDatasetDetailResponse, - APIFileServer, - APIFileItem -} from '@fastgpt/global/core/dataset/apiDataset'; + APIFileServer +} from '@fastgpt/global/core/dataset/apiDataset/type'; import axios, { type Method } from 'axios'; -import { addLog } from '../../../common/system/log'; -import { readFileRawTextByUrl } from '../read'; +import { addLog } from '../../../../common/system/log'; +import { readFileRawTextByUrl } from '../../read'; import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type'; import { type RequireOnlyOne } from '@fastgpt/global/common/type/utils'; +import { addRawTextBuffer, getRawTextBuffer } from '../../../../common/buffer/rawText/controller'; +import { addMinutes } from 'date-fns'; type ResponseDataType = { success: boolean; @@ -141,6 +142,15 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer } }; } if (previewUrl) { + // Get from buffer + const buffer = await getRawTextBuffer(previewUrl); + if (buffer) { + return { + title, + rawText: buffer.text + }; + } + const rawText = await readFileRawTextByUrl({ teamId, tmbId, @@ -149,6 +159,14 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer } customPdfParse, getFormatText: true }); + + await addRawTextBuffer({ + sourceId: previewUrl, + sourceName: title || '', + text: rawText, + expiredTime: addMinutes(new Date(), 30) + }); + return { title, rawText diff --git a/packages/service/core/dataset/feishuDataset/api.ts b/packages/service/core/dataset/apiDataset/feishuDataset/api.ts similarity index 97% rename from packages/service/core/dataset/feishuDataset/api.ts rename to packages/service/core/dataset/apiDataset/feishuDataset/api.ts index df0d58700..61146e663 100644 --- a/packages/service/core/dataset/feishuDataset/api.ts +++ b/packages/service/core/dataset/apiDataset/feishuDataset/api.ts @@ -3,10 +3,10 @@ import type { ApiFileReadContentResponse, ApiDatasetDetailResponse, FeishuServer -} from '@fastgpt/global/core/dataset/apiDataset'; +} from '@fastgpt/global/core/dataset/apiDataset/type'; import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type'; import axios, { type Method } from 'axios'; -import { addLog } from '../../../common/system/log'; +import { addLog } from '../../../../common/system/log'; type ResponseDataType = { success: boolean; diff --git a/packages/service/core/dataset/apiDataset/index.ts b/packages/service/core/dataset/apiDataset/index.ts index 79af379c4..46c5e87e0 100644 --- a/packages/service/core/dataset/apiDataset/index.ts +++ b/packages/service/core/dataset/apiDataset/index.ts @@ -1,18 +1,10 @@ -import type { - APIFileServer, - YuqueServer, - FeishuServer -} from '@fastgpt/global/core/dataset/apiDataset'; -import { useApiDatasetRequest } from './api'; -import { useYuqueDatasetRequest } from '../yuqueDataset/api'; -import { useFeishuDatasetRequest } from '../feishuDataset/api'; +import { useApiDatasetRequest } from './custom/api'; +import { useYuqueDatasetRequest } from './yuqueDataset/api'; +import { useFeishuDatasetRequest } from './feishuDataset/api'; +import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type'; -export const getApiDatasetRequest = async (data: { - apiServer?: APIFileServer; - yuqueServer?: YuqueServer; - feishuServer?: FeishuServer; -}) => { - const { apiServer, yuqueServer, feishuServer } = data; +export const getApiDatasetRequest = async (apiDatasetServer?: ApiDatasetServerType) => { + const { apiServer, yuqueServer, feishuServer } = apiDatasetServer || {}; if (apiServer) { return useApiDatasetRequest({ apiServer }); diff --git a/packages/service/core/dataset/yuqueDataset/api.ts b/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts similarity index 98% rename from packages/service/core/dataset/yuqueDataset/api.ts rename to packages/service/core/dataset/apiDataset/yuqueDataset/api.ts index cafbd9700..c0e76de1f 100644 --- a/packages/service/core/dataset/yuqueDataset/api.ts +++ b/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts @@ -3,9 +3,9 @@ import type { ApiFileReadContentResponse, YuqueServer, ApiDatasetDetailResponse -} from '@fastgpt/global/core/dataset/apiDataset'; +} from '@fastgpt/global/core/dataset/apiDataset/type'; import axios, { type Method } from 'axios'; -import { addLog } from '../../../common/system/log'; +import { addLog } from '../../../../common/system/log'; import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type'; type ResponseDataType = { diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts index c734e8896..646c5ab4e 100644 --- a/packages/service/core/dataset/collection/controller.ts +++ b/packages/service/core/dataset/collection/controller.ts @@ -5,9 +5,9 @@ import { } from '@fastgpt/global/core/dataset/constants'; import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; import { MongoDatasetCollection } from './schema'; -import { - type DatasetCollectionSchemaType, - type DatasetSchemaType +import type { + DatasetCollectionSchemaType, + DatasetSchemaType } from '@fastgpt/global/core/dataset/type'; import { MongoDatasetTraining } from '../training/schema'; import { MongoDatasetData } from '../data/schema'; @@ -15,7 +15,7 @@ import { delImgByRelatedId } from '../../../common/file/image/controller'; import { deleteDatasetDataVector } from '../../../common/vectorDB/controller'; import { delFileByFileIdList } from '../../../common/file/gridfs/controller'; import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; -import { type ClientSession } from '../../../common/mongo'; +import type { ClientSession } from '../../../common/mongo'; import { createOrGetCollectionTags } from './utils'; import { rawText2Chunks } from '../read'; import { checkDatasetLimit } from '../../../support/permission/teamLimit'; @@ -24,7 +24,7 @@ import { mongoSessionRun } from '../../../common/mongo/sessionRun'; import { createTrainingUsage } from '../../../support/wallet/usage/controller'; import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants'; import { getLLMModel, getEmbeddingModel, getVlmModel } from '../../ai/model'; -import { pushDataListToTrainingQueue } from '../training/controller'; +import { pushDataListToTrainingQueue, pushDatasetToParseQueue } from '../training/controller'; import { MongoImage } from '../../../common/file/image/schema'; import { hashStr } from '@fastgpt/global/common/string/tools'; import { addDays } from 'date-fns'; @@ -35,23 +35,28 @@ import { computeChunkSize, computeChunkSplitter, computeParagraphChunkDeep, + getAutoIndexSize, getLLMMaxChunkSize } from '@fastgpt/global/core/dataset/training/utils'; import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants'; +import { clearCollectionImages, removeDatasetImageExpiredTime } from '../image/utils'; export const createCollectionAndInsertData = async ({ dataset, rawText, relatedId, + imageIds, createCollectionParams, backupParse = false, billId, session }: { dataset: DatasetSchemaType; - rawText: string; + rawText?: string; relatedId?: string; + imageIds?: string[]; createCollectionParams: CreateOneCollectionParams; + backupParse?: boolean; billId?: string; @@ -69,13 +74,13 @@ export const createCollectionAndInsertData = async ({ // Set default params const trainingType = createCollectionParams.trainingType || DatasetCollectionDataProcessModeEnum.chunk; - const chunkSize = computeChunkSize({ - ...createCollectionParams, - trainingType, - llmModel: getLLMModel(dataset.agentModel) - }); const chunkSplitter = computeChunkSplitter(createCollectionParams); const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams); + const trainingMode = getTrainingModeByCollection({ + trainingType: trainingType, + autoIndexes: createCollectionParams.autoIndexes, + imageIndex: createCollectionParams.imageIndex + }); if ( trainingType === DatasetCollectionDataProcessModeEnum.qa || @@ -90,44 +95,85 @@ export const createCollectionAndInsertData = async ({ delete createCollectionParams.qaPrompt; } - // 1. split chunks - const chunks = rawText2Chunks({ - rawText, - chunkTriggerType: createCollectionParams.chunkTriggerType, - chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize, + // 1. split chunks or create image chunks + const { + chunks, chunkSize, - paragraphChunkDeep, - paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize, - maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)), - overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0, - customReg: chunkSplitter ? [chunkSplitter] : [], - backupParse - }); + indexSize + }: { + chunks: Array<{ + q?: string; + a?: string; // answer or custom content + imageId?: string; + indexes?: string[]; + }>; + chunkSize?: number; + indexSize?: number; + } = (() => { + if (rawText) { + const chunkSize = computeChunkSize({ + ...createCollectionParams, + trainingType, + llmModel: getLLMModel(dataset.agentModel) + }); + // Process text chunks + const chunks = rawText2Chunks({ + rawText, + chunkTriggerType: createCollectionParams.chunkTriggerType, + chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize, + chunkSize, + paragraphChunkDeep, + paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize, + maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)), + overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0, + customReg: chunkSplitter ? [chunkSplitter] : [], + backupParse + }); + return { + chunks, + chunkSize, + indexSize: createCollectionParams.indexSize ?? getAutoIndexSize(dataset.vectorModel) + }; + } + + if (imageIds) { + // Process image chunks + const chunks = imageIds.map((imageId: string) => ({ + imageId, + indexes: [] + })); + return { chunks }; + } + + return { + chunks: [], + chunkSize: computeChunkSize({ + ...createCollectionParams, + trainingType, + llmModel: getLLMModel(dataset.agentModel) + }), + indexSize: createCollectionParams.indexSize ?? getAutoIndexSize(dataset.vectorModel) + }; + })(); // 2. auth limit await checkDatasetLimit({ teamId, - insertLen: predictDataLimitLength( - getTrainingModeByCollection({ - trainingType: trainingType, - autoIndexes: createCollectionParams.autoIndexes, - imageIndex: createCollectionParams.imageIndex - }), - chunks - ) + insertLen: predictDataLimitLength(trainingMode, chunks) }); const fn = async (session: ClientSession) => { - // 3. create collection + // 3. Create collection const { _id: collectionId } = await createOneCollection({ ...createCollectionParams, trainingType, paragraphChunkDeep, chunkSize, chunkSplitter, + indexSize, - hashRawText: hashStr(rawText), - rawTextLength: rawText.length, + hashRawText: rawText ? hashStr(rawText) : undefined, + rawTextLength: rawText?.length, nextSyncTime: (() => { // ignore auto collections sync for website datasets if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined; @@ -160,34 +206,51 @@ export const createCollectionAndInsertData = async ({ })(); // 5. insert to training queue - const insertResults = await pushDataListToTrainingQueue({ - teamId, - tmbId, - datasetId: dataset._id, + const insertResults = await (async () => { + if (rawText || imageIds) { + return pushDataListToTrainingQueue({ + teamId, + tmbId, + datasetId: dataset._id, + collectionId, + agentModel: dataset.agentModel, + vectorModel: dataset.vectorModel, + vlmModel: dataset.vlmModel, + indexSize, + mode: trainingMode, + prompt: createCollectionParams.qaPrompt, + billId: traingBillId, + data: chunks.map((item, index) => ({ + ...item, + indexes: item.indexes?.map((text) => ({ + type: DatasetDataIndexTypeEnum.custom, + text + })), + chunkIndex: index + })), + session + }); + } else { + await pushDatasetToParseQueue({ + teamId, + tmbId, + datasetId: dataset._id, + collectionId, + billId: traingBillId, + session + }); + return { + insertLen: 0 + }; + } + })(); + + // 6. Remove images ttl index + await removeDatasetImageExpiredTime({ + ids: imageIds, collectionId, - agentModel: dataset.agentModel, - vectorModel: dataset.vectorModel, - vlmModel: dataset.vlmModel, - indexSize: createCollectionParams.indexSize, - mode: getTrainingModeByCollection({ - trainingType: trainingType, - autoIndexes: createCollectionParams.autoIndexes, - imageIndex: createCollectionParams.imageIndex - }), - prompt: createCollectionParams.qaPrompt, - billId: traingBillId, - data: chunks.map((item, index) => ({ - ...item, - indexes: item.indexes?.map((text) => ({ - type: DatasetDataIndexTypeEnum.custom, - text - })), - chunkIndex: index - })), session }); - - // 6. remove related image ttl if (relatedId) { await MongoImage.updateMany( { @@ -207,7 +270,7 @@ export const createCollectionAndInsertData = async ({ } return { - collectionId, + collectionId: String(collectionId), insertResults }; }; @@ -244,9 +307,9 @@ export async function createOneCollection({ session, ...props }: CreateOneCollec [ { ...props, - teamId, + _id: undefined, + parentId: parentId || null, - datasetId, tags: collectionTags, @@ -288,17 +351,20 @@ export const delCollectionRelatedSource = async ({ .map((item) => item?.metadata?.relatedImgId || '') .filter(Boolean); - // Delete files - await delFileByFileIdList({ - bucketName: BucketNameEnum.dataset, - fileIdList - }); - // Delete images - await delImgByRelatedId({ - teamId, - relateIds: relatedImageIds, - session - }); + // Delete files and images in parallel + await Promise.all([ + // Delete files + delFileByFileIdList({ + bucketName: BucketNameEnum.dataset, + fileIdList + }), + // Delete images + delImgByRelatedId({ + teamId, + relateIds: relatedImageIds, + session + }) + ]); }; /** * delete collection and it related data @@ -343,6 +409,9 @@ export async function delCollection({ datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }), + // Delete dataset_images + clearCollectionImages(collectionIds), + // Delete images if needed ...(delImg ? [ delImgByRelatedId({ @@ -353,6 +422,7 @@ export async function delCollection({ }) ] : []), + // Delete files if needed ...(delFile ? [ delFileByFileIdList({ diff --git a/packages/service/core/dataset/collection/utils.ts b/packages/service/core/dataset/collection/utils.ts index 96310ecb3..1dac1882c 100644 --- a/packages/service/core/dataset/collection/utils.ts +++ b/packages/service/core/dataset/collection/utils.ts @@ -1,11 +1,9 @@ import { MongoDatasetCollection } from './schema'; -import { type ClientSession } from '../../../common/mongo'; +import type { ClientSession } from '../../../common/mongo'; import { MongoDatasetCollectionTags } from '../tag/schema'; import { readFromSecondary } from '../../../common/mongo/utils'; -import { - type CollectionWithDatasetType, - type DatasetCollectionSchemaType -} from '@fastgpt/global/core/dataset/type'; +import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type'; +import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type'; import { DatasetCollectionDataProcessModeEnum, DatasetCollectionSyncResultEnum, @@ -159,9 +157,7 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => { return { type: DatasetSourceReadTypeEnum.apiFile, sourceId, - apiServer: dataset.apiServer, - feishuServer: dataset.feishuServer, - yuqueServer: dataset.yuqueServer + apiDatasetServer: dataset.apiDatasetServer }; })(); @@ -196,31 +192,8 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => { dataset, rawText: rawText, createCollectionParams: { - teamId: collection.teamId, - tmbId: collection.tmbId, + ...collection, name: title || collection.name, - datasetId: collection.datasetId, - parentId: collection.parentId, - type: collection.type, - - trainingType: collection.trainingType, - chunkSize: collection.chunkSize, - chunkSplitter: collection.chunkSplitter, - qaPrompt: collection.qaPrompt, - - fileId: collection.fileId, - rawLink: collection.rawLink, - externalFileId: collection.externalFileId, - externalFileUrl: collection.externalFileUrl, - apiFileId: collection.apiFileId, - - hashRawText, - rawTextLength: rawText.length, - - metadata: collection.metadata, - - tags: collection.tags, - createTime: collection.createTime, updateTime: new Date() } }); @@ -233,18 +206,37 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => { QA: 独立进程 Chunk: Image Index -> Auto index -> chunk index */ -export const getTrainingModeByCollection = (collection: { - trainingType: DatasetCollectionSchemaType['trainingType']; - autoIndexes?: DatasetCollectionSchemaType['autoIndexes']; - imageIndex?: DatasetCollectionSchemaType['imageIndex']; +export const getTrainingModeByCollection = ({ + trainingType, + autoIndexes, + imageIndex +}: { + trainingType: DatasetCollectionDataProcessModeEnum; + autoIndexes?: boolean; + imageIndex?: boolean; }) => { - if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) { + if ( + trainingType === DatasetCollectionDataProcessModeEnum.imageParse && + global.feConfigs?.isPlus + ) { + return TrainingModeEnum.imageParse; + } + + if (trainingType === DatasetCollectionDataProcessModeEnum.qa) { return TrainingModeEnum.qa; } - if (collection.imageIndex && global.feConfigs?.isPlus) { + if ( + trainingType === DatasetCollectionDataProcessModeEnum.chunk && + imageIndex && + global.feConfigs?.isPlus + ) { return TrainingModeEnum.image; } - if (collection.autoIndexes && global.feConfigs?.isPlus) { + if ( + trainingType === DatasetCollectionDataProcessModeEnum.chunk && + autoIndexes && + global.feConfigs?.isPlus + ) { return TrainingModeEnum.auto; } return TrainingModeEnum.chunk; diff --git a/packages/service/core/dataset/controller.ts b/packages/service/core/dataset/controller.ts index 1ae7a8d58..c8ab49910 100644 --- a/packages/service/core/dataset/controller.ts +++ b/packages/service/core/dataset/controller.ts @@ -9,6 +9,7 @@ import { deleteDatasetDataVector } from '../../common/vectorDB/controller'; import { MongoDatasetDataText } from './data/dataTextSchema'; import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset'; import { retryFn } from '@fastgpt/global/common/system/utils'; +import { clearDatasetImages } from './image/utils'; /* ============= dataset ========== */ /* find all datasetId by top datasetId */ @@ -102,8 +103,10 @@ export async function delDatasetRelevantData({ }), //delete dataset_datas MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }), - // Delete Image and file + // Delete collection image and file delCollectionRelatedSource({ collections }), + // Delete dataset Image + clearDatasetImages(datasetIds), // Delete vector data deleteDatasetDataVector({ teamId, datasetIds }) ]); diff --git a/packages/service/core/dataset/data/controller.ts b/packages/service/core/dataset/data/controller.ts new file mode 100644 index 000000000..b1872318e --- /dev/null +++ b/packages/service/core/dataset/data/controller.ts @@ -0,0 +1,56 @@ +import { getDatasetImagePreviewUrl } from '../image/utils'; +import type { DatasetCiteItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type'; + +export const formatDatasetDataValue = ({ + q, + a, + imageId, + teamId, + datasetId +}: { + q: string; + a?: string; + imageId?: string; + teamId: string; + datasetId: string; +}): { + q: string; + a?: string; + imagePreivewUrl?: string; +} => { + if (!imageId) { + return { + q, + a + }; + } + + const previewUrl = getDatasetImagePreviewUrl({ + imageId, + teamId, + datasetId, + expiredMinutes: 60 * 24 * 7 // 7 days + }); + + return { + q: `![${q.replaceAll('\n', '\\n')}](${previewUrl})`, + a, + imagePreivewUrl: previewUrl + }; +}; + +export const getFormatDatasetCiteList = (list: DatasetDataSchemaType[]) => { + return list.map((item) => ({ + _id: item._id, + ...formatDatasetDataValue({ + teamId: item.teamId, + datasetId: item.datasetId, + q: item.q, + a: item.a, + imageId: item.imageId + }), + history: item.history, + updateTime: item.updateTime, + index: item.chunkIndex + })); +}; diff --git a/packages/service/core/dataset/data/schema.ts b/packages/service/core/dataset/data/schema.ts index 0f5cefa5d..5b8d07e94 100644 --- a/packages/service/core/dataset/data/schema.ts +++ b/packages/service/core/dataset/data/schema.ts @@ -37,8 +37,7 @@ const DatasetDataSchema = new Schema({ required: true }, a: { - type: String, - default: '' + type: String }, history: { type: [ @@ -74,6 +73,9 @@ const DatasetDataSchema = new Schema({ default: [] }, + imageId: { + type: String + }, updateTime: { type: Date, default: () => new Date() diff --git a/packages/service/core/dataset/image/controller.ts b/packages/service/core/dataset/image/controller.ts new file mode 100644 index 000000000..0d3a44b08 --- /dev/null +++ b/packages/service/core/dataset/image/controller.ts @@ -0,0 +1,166 @@ +import { addMinutes } from 'date-fns'; +import { bucketName, MongoDatasetImageSchema } from './schema'; +import { connectionMongo, Types } from '../../../common/mongo'; +import fs from 'fs'; +import type { FileType } from '../../../common/file/multer'; +import fsp from 'fs/promises'; +import { computeGridFsChunSize } from '../../../common/file/gridfs/utils'; +import { setCron } from '../../../common/system/cron'; +import { checkTimerLock } from '../../../common/system/timerLock/utils'; +import { TimerIdEnum } from '../../../common/system/timerLock/constants'; +import { addLog } from '../../../common/system/log'; + +const getGridBucket = () => { + return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, { + bucketName: bucketName + }); +}; + +export const createDatasetImage = async ({ + teamId, + datasetId, + file, + expiredTime = addMinutes(new Date(), 30) +}: { + teamId: string; + datasetId: string; + file: FileType; + expiredTime?: Date; +}): Promise<{ imageId: string; previewUrl: string }> => { + const path = file.path; + const gridBucket = getGridBucket(); + const metadata = { + teamId: String(teamId), + datasetId: String(datasetId), + expiredTime + }; + + const stats = await fsp.stat(path); + if (!stats.isFile()) return Promise.reject(`${path} is not a file`); + + const readStream = fs.createReadStream(path, { + highWaterMark: 256 * 1024 + }); + const chunkSizeBytes = computeGridFsChunSize(stats.size); + + const stream = gridBucket.openUploadStream(file.originalname, { + metadata, + contentType: file.mimetype, + chunkSizeBytes + }); + + // save to gridfs + await new Promise((resolve, reject) => { + readStream + .pipe(stream as any) + .on('finish', resolve) + .on('error', reject); + }); + + return { + imageId: String(stream.id), + previewUrl: '' + }; +}; + +export const getDatasetImageReadData = async (imageId: string) => { + // Get file metadata to get contentType + const fileInfo = await MongoDatasetImageSchema.findOne({ + _id: new Types.ObjectId(imageId) + }).lean(); + if (!fileInfo) { + return Promise.reject('Image not found'); + } + + const gridBucket = getGridBucket(); + return { + stream: gridBucket.openDownloadStream(new Types.ObjectId(imageId)), + fileInfo + }; +}; +export const getDatasetImageBase64 = async (imageId: string) => { + // Get file metadata to get contentType + const fileInfo = await MongoDatasetImageSchema.findOne({ + _id: new Types.ObjectId(imageId) + }).lean(); + if (!fileInfo) { + return Promise.reject('Image not found'); + } + + // Get image stream from GridFS + const { stream } = await getDatasetImageReadData(imageId); + + // Convert stream to buffer + const chunks: Buffer[] = []; + + return new Promise((resolve, reject) => { + stream.on('data', (chunk: Buffer) => { + chunks.push(chunk); + }); + + stream.on('end', () => { + // Combine all chunks into a single buffer + const buffer = Buffer.concat(chunks); + // Convert buffer to base64 string + const base64 = buffer.toString('base64'); + const dataUrl = `data:${fileInfo.contentType || 'image/jpeg'};base64,${base64}`; + resolve(dataUrl); + }); + + stream.on('error', reject); + }); +}; + +export const deleteDatasetImage = async (imageId: string) => { + const gridBucket = getGridBucket(); + + try { + await gridBucket.delete(new Types.ObjectId(imageId)); + } catch (error: any) { + const msg = error?.message; + if (msg.includes('File not found')) { + addLog.warn('Delete dataset image error', error); + return; + } else { + return Promise.reject(error); + } + } +}; + +export const clearExpiredDatasetImageCron = async () => { + const gridBucket = getGridBucket(); + const clearExpiredDatasetImages = async () => { + addLog.debug('Clear expired dataset image start'); + + const data = await MongoDatasetImageSchema.find( + { + 'metadata.expiredTime': { $lt: new Date() } + }, + '_id' + ).lean(); + + for (const item of data) { + try { + await gridBucket.delete(item._id); + } catch (error) { + addLog.error('Delete expired dataset image error', error); + } + } + addLog.debug('Clear expired dataset image end'); + }; + + setCron('*/10 * * * *', async () => { + if ( + await checkTimerLock({ + timerId: TimerIdEnum.clearExpiredDatasetImage, + lockMinuted: 9 + }) + ) { + try { + await clearExpiredDatasetImages(); + } catch (error) { + addLog.error('clearExpiredDatasetImageCron error', error); + } + } + }); +}; diff --git a/packages/service/core/dataset/image/schema.ts b/packages/service/core/dataset/image/schema.ts new file mode 100644 index 000000000..674c76891 --- /dev/null +++ b/packages/service/core/dataset/image/schema.ts @@ -0,0 +1,36 @@ +import type { Types } from '../../../common/mongo'; +import { getMongoModel, Schema } from '../../../common/mongo'; + +export const bucketName = 'dataset_image'; + +const MongoDatasetImage = new Schema({ + length: { type: Number, required: true }, + chunkSize: { type: Number, required: true }, + uploadDate: { type: Date, required: true }, + filename: { type: String, required: true }, + contentType: { type: String, required: true }, + metadata: { + teamId: { type: String, required: true }, + datasetId: { type: String, required: true }, + collectionId: { type: String }, + expiredTime: { type: Date, required: true } + } +}); +MongoDatasetImage.index({ 'metadata.datasetId': 'hashed' }); +MongoDatasetImage.index({ 'metadata.collectionId': 'hashed' }); +MongoDatasetImage.index({ 'metadata.expiredTime': -1 }); + +export const MongoDatasetImageSchema = getMongoModel<{ + _id: Types.ObjectId; + length: number; + chunkSize: number; + uploadDate: Date; + filename: string; + contentType: string; + metadata: { + teamId: string; + datasetId: string; + collectionId: string; + expiredTime: Date; + }; +}>(`${bucketName}.files`, MongoDatasetImage); diff --git a/packages/service/core/dataset/image/utils.ts b/packages/service/core/dataset/image/utils.ts new file mode 100644 index 000000000..eed4a0b82 --- /dev/null +++ b/packages/service/core/dataset/image/utils.ts @@ -0,0 +1,103 @@ +import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode'; +import { Types, type ClientSession } from '../../../common/mongo'; +import { deleteDatasetImage } from './controller'; +import { MongoDatasetImageSchema } from './schema'; +import { addMinutes } from 'date-fns'; +import jwt from 'jsonwebtoken'; + +export const removeDatasetImageExpiredTime = async ({ + ids = [], + collectionId, + session +}: { + ids?: string[]; + collectionId: string; + session?: ClientSession; +}) => { + if (ids.length === 0) return; + return MongoDatasetImageSchema.updateMany( + { + _id: { + $in: ids + .filter((id) => Types.ObjectId.isValid(id)) + .map((id) => (typeof id === 'string' ? new Types.ObjectId(id) : id)) + } + }, + { + $unset: { 'metadata.expiredTime': '' }, + $set: { + 'metadata.collectionId': String(collectionId) + } + }, + { session } + ); +}; + +export const getDatasetImagePreviewUrl = ({ + imageId, + teamId, + datasetId, + expiredMinutes +}: { + imageId: string; + teamId: string; + datasetId: string; + expiredMinutes: number; +}) => { + const expiredTime = Math.floor(addMinutes(new Date(), expiredMinutes).getTime() / 1000); + + const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken'; + const token = jwt.sign( + { + teamId: String(teamId), + datasetId: String(datasetId), + exp: expiredTime + }, + key + ); + + return `/api/core/dataset/image/${imageId}?token=${token}`; +}; +export const authDatasetImagePreviewUrl = (token?: string) => + new Promise<{ + teamId: string; + datasetId: string; + }>((resolve, reject) => { + if (!token) { + return reject(ERROR_ENUM.unAuthFile); + } + const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken'; + + jwt.verify(token, key, (err, decoded: any) => { + if (err || !decoded?.teamId || !decoded?.datasetId) { + reject(ERROR_ENUM.unAuthFile); + return; + } + resolve({ + teamId: decoded.teamId, + datasetId: decoded.datasetId + }); + }); + }); + +export const clearDatasetImages = async (datasetIds: string[]) => { + if (datasetIds.length === 0) return; + const images = await MongoDatasetImageSchema.find( + { + 'metadata.datasetId': { $in: datasetIds.map((item) => String(item)) } + }, + '_id' + ).lean(); + await Promise.all(images.map((image) => deleteDatasetImage(String(image._id)))); +}; + +export const clearCollectionImages = async (collectionIds: string[]) => { + if (collectionIds.length === 0) return; + const images = await MongoDatasetImageSchema.find( + { + 'metadata.collectionId': { $in: collectionIds.map((item) => String(item)) } + }, + '_id' + ).lean(); + await Promise.all(images.map((image) => deleteDatasetImage(String(image._id)))); +}; diff --git a/packages/service/core/dataset/read.ts b/packages/service/core/dataset/read.ts index 647c05758..20b34e3bc 100644 --- a/packages/service/core/dataset/read.ts +++ b/packages/service/core/dataset/read.ts @@ -9,13 +9,9 @@ import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/st import axios from 'axios'; import { readRawContentByFileBuffer } from '../../common/file/read/utils'; import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools'; -import { - type APIFileServer, - type FeishuServer, - type YuqueServer -} from '@fastgpt/global/core/dataset/apiDataset'; import { getApiDatasetRequest } from './apiDataset'; import Papa from 'papaparse'; +import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type'; export const readFileRawTextByUrl = async ({ teamId, @@ -69,9 +65,7 @@ export const readDatasetSourceRawText = async ({ sourceId, selector, externalFileId, - apiServer, - feishuServer, - yuqueServer, + apiDatasetServer, customPdfParse, getFormatText }: { @@ -84,9 +78,7 @@ export const readDatasetSourceRawText = async ({ selector?: string; // link selector externalFileId?: string; // external file dataset - apiServer?: APIFileServer; // api dataset - feishuServer?: FeishuServer; // feishu dataset - yuqueServer?: YuqueServer; // yuque dataset + apiDatasetServer?: ApiDatasetServerType; // api dataset }): Promise<{ title?: string; rawText: string; @@ -110,9 +102,14 @@ export const readDatasetSourceRawText = async ({ selector }); + const { title = sourceId, content = '' } = result[0]; + if (!content || content === 'Cannot fetch internal url') { + return Promise.reject(content || 'Can not fetch content from link'); + } + return { - title: result[0]?.title, - rawText: result[0]?.content || '' + title, + rawText: content }; } else if (type === DatasetSourceReadTypeEnum.externalFile) { if (!externalFileId) return Promise.reject('FileId not found'); @@ -128,9 +125,7 @@ export const readDatasetSourceRawText = async ({ }; } else if (type === DatasetSourceReadTypeEnum.apiFile) { const { title, rawText } = await readApiServerFileContent({ - apiServer, - feishuServer, - yuqueServer, + apiDatasetServer, apiFileId: sourceId, teamId, tmbId @@ -147,17 +142,13 @@ export const readDatasetSourceRawText = async ({ }; export const readApiServerFileContent = async ({ - apiServer, - feishuServer, - yuqueServer, + apiDatasetServer, apiFileId, teamId, tmbId, customPdfParse }: { - apiServer?: APIFileServer; - feishuServer?: FeishuServer; - yuqueServer?: YuqueServer; + apiDatasetServer?: ApiDatasetServerType; apiFileId: string; teamId: string; tmbId: string; @@ -166,13 +157,7 @@ export const readApiServerFileContent = async ({ title?: string; rawText: string; }> => { - return ( - await getApiDatasetRequest({ - apiServer, - yuqueServer, - feishuServer - }) - ).getFileContent({ + return (await getApiDatasetRequest(apiDatasetServer)).getFileContent({ teamId, tmbId, apiFileId, @@ -186,9 +171,11 @@ export const rawText2Chunks = ({ chunkTriggerMinSize = 1000, backupParse, chunkSize = 512, + imageIdList, ...splitProps }: { rawText: string; + imageIdList?: string[]; chunkTriggerType?: ChunkTriggerConfigTypeEnum; chunkTriggerMinSize?: number; // maxSize from agent model, not store @@ -199,17 +186,18 @@ export const rawText2Chunks = ({ q: string; a: string; indexes?: string[]; + imageIdList?: string[]; }[] => { const parseDatasetBackup2Chunks = (rawText: string) => { const csvArr = Papa.parse(rawText).data as string[][]; - console.log(rawText, csvArr); const chunks = csvArr .slice(1) .map((item) => ({ q: item[0] || '', a: item[1] || '', - indexes: item.slice(2) + indexes: item.slice(2), + imageIdList })) .filter((item) => item.q || item.a); @@ -231,7 +219,8 @@ export const rawText2Chunks = ({ return [ { q: rawText, - a: '' + a: '', + imageIdList } ]; } @@ -240,7 +229,7 @@ export const rawText2Chunks = ({ if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) { const textLength = rawText.trim().length; if (textLength < chunkTriggerMinSize) { - return [{ q: rawText, a: '' }]; + return [{ q: rawText, a: '', imageIdList }]; } } @@ -253,6 +242,7 @@ export const rawText2Chunks = ({ return chunks.map((item) => ({ q: item, a: '', - indexes: [] + indexes: [], + imageIdList })); }; diff --git a/packages/service/core/dataset/schema.ts b/packages/service/core/dataset/schema.ts index 1573acb66..c865dcb92 100644 --- a/packages/service/core/dataset/schema.ts +++ b/packages/service/core/dataset/schema.ts @@ -127,14 +127,16 @@ const DatasetSchema = new Schema({ type: Boolean, default: true }, - apiServer: Object, - feishuServer: Object, - yuqueServer: Object, + + apiDatasetServer: Object, // abandoned autoSync: Boolean, externalReadUrl: String, - defaultPermission: Number + defaultPermission: Number, + apiServer: Object, + feishuServer: Object, + yuqueServer: Object }); try { diff --git a/packages/service/core/dataset/search/controller.ts b/packages/service/core/dataset/search/controller.ts index 65861299e..ffd94eca4 100644 --- a/packages/service/core/dataset/search/controller.ts +++ b/packages/service/core/dataset/search/controller.ts @@ -28,6 +28,7 @@ import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants'; import { datasetSearchQueryExtension } from './utils'; import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d'; import { addLog } from '../../../common/system/log'; +import { formatDatasetDataValue } from '../data/controller'; export type SearchDatasetDataProps = { histories: ChatItemType[]; @@ -175,6 +176,12 @@ export async function searchDatasetData( collectionFilterMatch } = props; + // Constants data + const datasetDataSelectField = + '_id datasetId collectionId updateTime q a imageId chunkIndex indexes'; + const datsaetCollectionSelectField = + '_id name fileId rawLink apiFileId externalFileId externalFileUrl'; + /* init params */ searchMode = DatasetSearchModeMap[searchMode] ? searchMode : DatasetSearchModeEnum.embedding; usingReRank = usingReRank && !!getDefaultRerankModel(); @@ -463,14 +470,14 @@ export async function searchDatasetData( collectionId: { $in: collectionIdList }, 'indexes.dataId': { $in: results.map((item) => item.id?.trim()) } }, - '_id datasetId collectionId updateTime q a chunkIndex indexes', + datasetDataSelectField, { ...readFromSecondary } ).lean(), MongoDatasetCollection.find( { _id: { $in: collectionIdList } }, - '_id name fileId rawLink apiFileId externalFileId externalFileUrl', + datsaetCollectionSelectField, { ...readFromSecondary } ).lean() ]); @@ -494,8 +501,13 @@ export async function searchDatasetData( const result: SearchDataResponseItemType = { id: String(data._id), updateTime: data.updateTime, - q: data.q, - a: data.a, + ...formatDatasetDataValue({ + teamId, + datasetId: data.datasetId, + q: data.q, + a: data.a, + imageId: data.imageId + }), chunkIndex: data.chunkIndex, datasetId: String(data.datasetId), collectionId: String(data.collectionId), @@ -597,14 +609,14 @@ export async function searchDatasetData( { _id: { $in: searchResults.map((item) => item.dataId) } }, - '_id datasetId collectionId updateTime q a chunkIndex indexes', + datasetDataSelectField, { ...readFromSecondary } ).lean(), MongoDatasetCollection.find( { _id: { $in: searchResults.map((item) => item.collectionId) } }, - '_id name fileId rawLink apiFileId externalFileId externalFileUrl', + datsaetCollectionSelectField, { ...readFromSecondary } ).lean() ]); @@ -630,8 +642,13 @@ export async function searchDatasetData( datasetId: String(data.datasetId), collectionId: String(data.collectionId), updateTime: data.updateTime, - q: data.q, - a: data.a, + ...formatDatasetDataValue({ + teamId, + datasetId: data.datasetId, + q: data.q, + a: data.a, + imageId: data.imageId + }), chunkIndex: data.chunkIndex, indexes: data.indexes, ...getCollectionSourceData(collection), diff --git a/packages/service/core/dataset/training/controller.ts b/packages/service/core/dataset/training/controller.ts index 30708cbe0..990cfa427 100644 --- a/packages/service/core/dataset/training/controller.ts +++ b/packages/service/core/dataset/training/controller.ts @@ -12,10 +12,7 @@ import { getCollectionWithDataset } from '../controller'; import { mongoSessionRun } from '../../../common/mongo/sessionRun'; import { type PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type'; import { i18nT } from '../../../../web/i18n/utils'; -import { - getLLMDefaultChunkSize, - getLLMMaxChunkSize -} from '../../../../global/core/dataset/training/utils'; +import { getLLMMaxChunkSize } from '../../../../global/core/dataset/training/utils'; export const lockTrainingDataByTeamId = async (teamId: string): Promise => { try { @@ -62,10 +59,10 @@ export async function pushDataListToTrainingQueue({ indexSize, session }: PushDataToTrainingQueueProps): Promise { - const getImageChunkMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => { + const formatTrainingMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => { if (mode !== TrainingModeEnum.image) return mode; // 检查内容中,是否包含 ![](xxx) 的图片格式 - const text = data.q + data.a || ''; + const text = (data.q || '') + (data.a || ''); const regex = /!\[\]\((.*?)\)/g; const match = text.match(regex); if (match) { @@ -82,9 +79,6 @@ export async function pushDataListToTrainingQueue({ if (!agentModelData) { return Promise.reject(i18nT('common:error_llm_not_config')); } - if (mode === TrainingModeEnum.chunk || mode === TrainingModeEnum.auto) { - prompt = undefined; - } const { model, maxToken, weight } = await (async () => { if (mode === TrainingModeEnum.chunk) { @@ -101,7 +95,7 @@ export async function pushDataListToTrainingQueue({ weight: 0 }; } - if (mode === TrainingModeEnum.image) { + if (mode === TrainingModeEnum.image || mode === TrainingModeEnum.imageParse) { const vllmModelData = getVlmModel(vlmModel); if (!vllmModelData) { return Promise.reject(i18nT('common:error_vlm_not_config')); @@ -116,17 +110,8 @@ export async function pushDataListToTrainingQueue({ return Promise.reject(`Training mode "${mode}" is inValid`); })(); - // filter repeat or equal content - const set = new Set(); - const filterResult: Record = { - success: [], - overToken: [], - repeat: [], - error: [] - }; - // format q and a, remove empty char - data.forEach((item) => { + data = data.filter((item) => { item.q = simpleText(item.q); item.a = simpleText(item.a); @@ -140,8 +125,7 @@ export async function pushDataListToTrainingQueue({ .filter(Boolean); // filter repeat content - if (!item.q) { - filterResult.error.push(item); + if (!item.imageId && !item.q) { return; } @@ -149,42 +133,36 @@ export async function pushDataListToTrainingQueue({ // Oversize llm tokens if (text.length > maxToken) { - filterResult.overToken.push(item); return; } - if (set.has(text)) { - filterResult.repeat.push(item); - } else { - filterResult.success.push(item); - set.add(text); - } + return true; }); // insert data to db - const insertLen = filterResult.success.length; - const failedDocuments: PushDatasetDataChunkProps[] = []; + const insertLen = data.length; // 使用 insertMany 批量插入 - const batchSize = 200; + const batchSize = 500; const insertData = async (startIndex: number, session: ClientSession) => { - const list = filterResult.success.slice(startIndex, startIndex + batchSize); + const list = data.slice(startIndex, startIndex + batchSize); if (list.length === 0) return; try { - await MongoDatasetTraining.insertMany( + const result = await MongoDatasetTraining.insertMany( list.map((item) => ({ teamId, tmbId, - datasetId, - collectionId, + datasetId: datasetId, + collectionId: collectionId, billId, - mode: getImageChunkMode(item, mode), + mode: formatTrainingMode(item, mode), prompt, model, - q: item.q, - a: item.a, + ...(item.q && { q: item.q }), + ...(item.a && { a: item.a }), + ...(item.imageId && { imageId: item.imageId }), chunkIndex: item.chunkIndex ?? 0, indexSize, weight: weight ?? 0, @@ -193,21 +171,20 @@ export async function pushDataListToTrainingQueue({ })), { session, - ordered: true + ordered: false, + rawResult: true, + includeResultMetadata: false // 进一步减少返回数据 } ); + + if (result.insertedCount !== list.length) { + return Promise.reject(`Insert data error, ${JSON.stringify(result)}`); + } } catch (error: any) { addLog.error(`Insert error`, error); - // 如果有错误,将失败的文档添加到失败列表中 - error.writeErrors?.forEach((writeError: any) => { - failedDocuments.push(data[writeError.index]); - }); - console.log('failed', failedDocuments); + return Promise.reject(error); } - // 对于失败的文档,尝试单独插入 - await MongoDatasetTraining.create(failedDocuments, { session }); - return insertData(startIndex + batchSize, session); }; @@ -219,10 +196,37 @@ export async function pushDataListToTrainingQueue({ }); } - delete filterResult.success; - return { - insertLen, - ...filterResult + insertLen }; } + +export const pushDatasetToParseQueue = async ({ + teamId, + tmbId, + datasetId, + collectionId, + billId, + session +}: { + teamId: string; + tmbId: string; + datasetId: string; + collectionId: string; + billId: string; + session: ClientSession; +}) => { + await MongoDatasetTraining.create( + [ + { + teamId, + tmbId, + datasetId, + collectionId, + billId, + mode: TrainingModeEnum.parse + } + ], + { session, ordered: true } + ); +}; diff --git a/packages/service/core/dataset/training/schema.ts b/packages/service/core/dataset/training/schema.ts index 44f4e22db..eaf85ef3d 100644 --- a/packages/service/core/dataset/training/schema.ts +++ b/packages/service/core/dataset/training/schema.ts @@ -54,16 +54,8 @@ const TrainingDataSchema = new Schema({ default: 5 }, - model: { - // ai model - type: String, - required: true - }, - prompt: { - // qa split prompt - type: String, - default: '' - }, + model: String, + prompt: String, q: { type: String, default: '' @@ -72,6 +64,7 @@ const TrainingDataSchema = new Schema({ type: String, default: '' }, + imageId: String, chunkIndex: { type: Number, default: 0 @@ -81,9 +74,7 @@ const TrainingDataSchema = new Schema({ type: Number, default: 0 }, - dataId: { - type: Schema.Types.ObjectId - }, + dataId: Schema.Types.ObjectId, indexes: { type: [ { diff --git a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts index 0aebe99ce..cd4a54e3d 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/promptCall.ts @@ -302,18 +302,21 @@ export const runToolWithPromptCall = async ( const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || ''; const usage = aiResponse.usage; + const formatReasonContent = removeDatasetCiteText(reasoningContent, retainDatasetCite); + const formatContent = removeDatasetCiteText(content, retainDatasetCite); + // API already parse reasoning content - if (reasoningContent || !aiChatReasoning) { + if (formatReasonContent || !aiChatReasoning) { return { - answer: content, - reasoning: reasoningContent, + answer: formatContent, + reasoning: formatReasonContent, finish_reason, inputTokens: usage?.prompt_tokens, outputTokens: usage?.completion_tokens }; } - const [think, answer] = parseReasoningContent(content); + const [think, answer] = parseReasoningContent(formatContent); return { answer, reasoning: think, @@ -328,7 +331,7 @@ export const runToolWithPromptCall = async ( workflowStreamResponse?.({ event: SseResponseEventEnum.fastAnswer, data: textAdaptGptResponse({ - reasoning_content: removeDatasetCiteText(reasoning, retainDatasetCite) + reasoning_content: reasoning }) }); } diff --git a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts index 71ab13e7d..8e157b252 100644 --- a/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts +++ b/packages/service/core/workflow/dispatch/agent/runTool/toolChoice.ts @@ -356,11 +356,14 @@ export const runToolWithToolChoice = async ( const reasoningContent = result.choices?.[0]?.message?.reasoning_content || ''; const usage = result.usage; + const formatReasoningContent = removeDatasetCiteText(reasoningContent, retainDatasetCite); + const formatAnswer = removeDatasetCiteText(answer, retainDatasetCite); + if (aiChatReasoning && reasoningContent) { workflowStreamResponse?.({ event: SseResponseEventEnum.fastAnswer, data: textAdaptGptResponse({ - reasoning_content: removeDatasetCiteText(reasoningContent, retainDatasetCite) + reasoning_content: formatReasoningContent }) }); } @@ -395,14 +398,14 @@ export const runToolWithToolChoice = async ( workflowStreamResponse?.({ event: SseResponseEventEnum.fastAnswer, data: textAdaptGptResponse({ - text: removeDatasetCiteText(answer, retainDatasetCite) + text: formatAnswer }) }); } return { - reasoningContent: (reasoningContent as string) || '', - answer, + reasoningContent: formatReasoningContent, + answer: formatAnswer, toolCalls: toolCalls, finish_reason, inputTokens: usage?.prompt_tokens, diff --git a/packages/service/core/workflow/dispatch/chat/oneapi.ts b/packages/service/core/workflow/dispatch/chat/oneapi.ts index 84bd9ab77..c68646be8 100644 --- a/packages/service/core/workflow/dispatch/chat/oneapi.ts +++ b/packages/service/core/workflow/dispatch/chat/oneapi.ts @@ -263,12 +263,15 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise { - const max = Number(global.systemEnv?.tokenWorkers || 30); + const max = Math.min(Number(global.systemEnv?.tokenWorkers || 30), 100); const workerController = getWorkerController({ name: WorkerNameEnum.countGptMessagesTokens, maxReservedThreads: max diff --git a/packages/web/components/common/Icon/constants.ts b/packages/web/components/common/Icon/constants.ts index 586420fe6..d0c53defa 100644 --- a/packages/web/components/common/Icon/constants.ts +++ b/packages/web/components/common/Icon/constants.ts @@ -220,9 +220,11 @@ export const iconPaths = { import('./icons/core/dataset/feishuDatasetOutline.svg'), 'core/dataset/fileCollection': () => import('./icons/core/dataset/fileCollection.svg'), 'core/dataset/fullTextRecall': () => import('./icons/core/dataset/fullTextRecall.svg'), + 'core/dataset/imageFill': () => import('./icons/core/dataset/imageFill.svg'), 'core/dataset/manualCollection': () => import('./icons/core/dataset/manualCollection.svg'), 'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'), 'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'), + 'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'), 'core/dataset/questionExtension': () => import('./icons/core/dataset/questionExtension.svg'), 'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'), 'core/dataset/searchfilter': () => import('./icons/core/dataset/searchfilter.svg'), @@ -230,7 +232,6 @@ export const iconPaths = { 'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'), 'core/dataset/tag': () => import('./icons/core/dataset/tag.svg'), 'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'), - 'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'), 'core/dataset/websiteDatasetColor': () => import('./icons/core/dataset/websiteDatasetColor.svg'), 'core/dataset/websiteDatasetOutline': () => import('./icons/core/dataset/websiteDatasetOutline.svg'), @@ -379,10 +380,12 @@ export const iconPaths = { fullScreen: () => import('./icons/fullScreen.svg'), help: () => import('./icons/help.svg'), history: () => import('./icons/history.svg'), + image: () => import('./icons/image.svg'), infoRounded: () => import('./icons/infoRounded.svg'), kbTest: () => import('./icons/kbTest.svg'), key: () => import('./icons/key.svg'), keyPrimary: () => import('./icons/keyPrimary.svg'), + loading: () => import('./icons/loading.svg'), menu: () => import('./icons/menu.svg'), minus: () => import('./icons/minus.svg'), 'modal/AddClb': () => import('./icons/modal/AddClb.svg'), diff --git a/packages/web/components/common/Icon/icons/core/app/simpleMode/template.svg b/packages/web/components/common/Icon/icons/core/app/simpleMode/template.svg index d47abbdbd..3a3ed1295 100644 --- a/packages/web/components/common/Icon/icons/core/app/simpleMode/template.svg +++ b/packages/web/components/common/Icon/icons/core/app/simpleMode/template.svg @@ -1,4 +1,4 @@ - + diff --git a/packages/web/components/common/Icon/icons/core/dataset/imageFill.svg b/packages/web/components/common/Icon/icons/core/dataset/imageFill.svg new file mode 100644 index 000000000..421c7c49a --- /dev/null +++ b/packages/web/components/common/Icon/icons/core/dataset/imageFill.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/packages/web/components/common/Icon/icons/core/workflow/template/bocha.svg b/packages/web/components/common/Icon/icons/core/workflow/template/bocha.svg index 071333021..b24c2b3e6 100644 --- a/packages/web/components/common/Icon/icons/core/workflow/template/bocha.svg +++ b/packages/web/components/common/Icon/icons/core/workflow/template/bocha.svg @@ -1,4 +1,4 @@ - + diff --git a/packages/web/components/common/Icon/icons/image.svg b/packages/web/components/common/Icon/icons/image.svg new file mode 100644 index 000000000..94b529725 --- /dev/null +++ b/packages/web/components/common/Icon/icons/image.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/packages/web/components/common/Icon/icons/loading.svg b/packages/web/components/common/Icon/icons/loading.svg new file mode 100644 index 000000000..10033653f --- /dev/null +++ b/packages/web/components/common/Icon/icons/loading.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/packages/web/components/common/MyMenu/Multiple.tsx b/packages/web/components/common/MyMenu/Multiple.tsx new file mode 100644 index 000000000..60b7b0a31 --- /dev/null +++ b/packages/web/components/common/MyMenu/Multiple.tsx @@ -0,0 +1,331 @@ +import React, { useMemo, useRef, useState } from 'react'; +import { + Box, + Flex, + type MenuItemProps, + type PlacementWithLogical, + type AvatarProps, + type BoxProps, + type DividerProps +} from '@chakra-ui/react'; +import MyDivider from '../MyDivider'; +import type { IconNameType } from '../Icon/type'; +import { useSystem } from '../../../hooks/useSystem'; +import Avatar from '../Avatar'; +import MyPopover from '../MyPopover'; + +export type MenuItemType = 'primary' | 'danger' | 'gray' | 'grayBg'; + +export type MenuSizeType = 'sm' | 'md' | 'xs' | 'mini'; + +export type MenuItemData = { + label?: string; + children: Array<{ + isActive?: boolean; + type?: MenuItemType; + icon?: IconNameType | string; + label: string | React.ReactNode; + description?: string; + onClick?: () => any; + menuItemStyles?: MenuItemProps; + menuList?: MenuItemData[]; + }>; +}; + +export type Props = { + label?: string; + width?: number | string; + offset?: [number, number]; + Trigger: React.ReactNode; + trigger?: 'hover' | 'click'; + size?: MenuSizeType; + placement?: PlacementWithLogical; + hasArrow?: boolean; + onClose?: () => void; + menuList: MenuItemData[]; +}; + +const typeMapStyle: Record = { + primary: { + styles: { + _hover: { + backgroundColor: 'primary.50', + color: 'primary.600' + }, + _focus: { + backgroundColor: 'primary.50', + color: 'primary.600' + }, + _active: { + backgroundColor: 'primary.50', + color: 'primary.600' + } + }, + iconColor: 'myGray.600' + }, + gray: { + styles: { + _hover: { + backgroundColor: 'myGray.05', + color: 'primary.600' + }, + _focus: { + backgroundColor: 'myGray.05', + color: 'primary.600' + }, + _active: { + backgroundColor: 'myGray.05', + color: 'primary.600' + } + }, + iconColor: 'myGray.400' + }, + grayBg: { + styles: { + _hover: { + backgroundColor: 'myGray.05', + color: 'primary.600' + }, + _focus: { + backgroundColor: 'myGray.05', + color: 'primary.600' + }, + _active: { + backgroundColor: 'myGray.05', + color: 'primary.600' + } + }, + iconColor: 'myGray.600' + }, + danger: { + styles: { + color: 'red.600', + _hover: { + background: 'red.1' + }, + _focus: { + background: 'red.1' + }, + _active: { + background: 'red.1' + } + }, + iconColor: 'red.600' + } +}; +const sizeMapStyle: Record< + MenuSizeType, + { + iconStyle: AvatarProps; + labelStyle: BoxProps; + dividerStyle: DividerProps; + menuItemStyle: MenuItemProps; + } +> = { + mini: { + iconStyle: { + w: '14px' + }, + labelStyle: { + fontSize: 'mini' + }, + dividerStyle: { + my: 0.5 + }, + menuItemStyle: { + py: 1.5, + px: 2 + } + }, + xs: { + iconStyle: { + w: '14px' + }, + labelStyle: { + fontSize: 'sm' + }, + dividerStyle: { + my: 0.5 + }, + menuItemStyle: { + py: 1.5, + px: 2 + } + }, + sm: { + iconStyle: { + w: '1rem' + }, + labelStyle: { + fontSize: 'sm' + }, + dividerStyle: { + my: 1 + }, + menuItemStyle: { + py: 2, + px: 3, + _notLast: { + mb: 0.5 + } + } + }, + md: { + iconStyle: { + w: '2rem', + borderRadius: '6px' + }, + labelStyle: { + fontSize: 'sm' + }, + dividerStyle: { + my: 1 + }, + menuItemStyle: { + py: 2, + px: 3, + _notLast: { + mb: 0.5 + } + } + } +}; + +const MenuItem = ({ + item, + size, + onClose +}: { + item: MenuItemData['children'][number]; + size: MenuSizeType; + onClose: () => void; +}) => { + return ( + { + if (item.onClick) { + item.onClick(); + } + if (!item.menuList) { + onClose(); + } + }} + > + + {!!item.icon && ( + + )} + + + {item.label} + + {item.description && ( + + {item.description} + + )} + + + + ); +}; + +const MultipleMenu = (props: Props) => { + const { + width = 'auto', + trigger = 'hover', + size = 'sm', + offset, + Trigger, + menuList, + hasArrow = false, + placement = 'bottom-start' + } = props; + + const { isPc } = useSystem(); + const formatTrigger = !isPc ? 'click' : trigger; + + return ( + + {({ onClose }) => { + const onCloseFn = () => { + onClose(); + props?.onClose?.(); + }; + + return ( + + {menuList.map((group, i) => ( + + {i !== 0 && } + {group.label && ( + + {group.label} + + )} + {group.children.map((item, index) => { + return ( + + {item.menuList ? ( + + + + } + hasArrow + /> + ) : ( + + )} + + ); + })} + + ))} + + ); + }} + + ); +}; + +export default React.memo(MultipleMenu); diff --git a/packages/web/components/common/MyMenu/index.tsx b/packages/web/components/common/MyMenu/index.tsx index 49e07cace..4f67dcca0 100644 --- a/packages/web/components/common/MyMenu/index.tsx +++ b/packages/web/components/common/MyMenu/index.tsx @@ -1,4 +1,4 @@ -import React, { useMemo, useRef, useState } from 'react'; +import React, { useCallback, useMemo, useRef, useState } from 'react'; import { Menu, MenuList, @@ -18,9 +18,20 @@ import { useSystem } from '../../../hooks/useSystem'; import Avatar from '../Avatar'; export type MenuItemType = 'primary' | 'danger' | 'gray' | 'grayBg'; - export type MenuSizeType = 'sm' | 'md' | 'xs' | 'mini'; +export type MenuItemData = { + label?: string; + children: Array<{ + isActive?: boolean; + type?: MenuItemType; + icon?: IconNameType | string; + label: string | React.ReactNode; + description?: string; + onClick?: () => any; + menuItemStyles?: MenuItemProps; + }>; +}; export type Props = { width?: number | string; offset?: [number, number]; @@ -29,18 +40,7 @@ export type Props = { size?: MenuSizeType; placement?: PlacementWithLogical; - menuList: { - label?: string; - children: { - isActive?: boolean; - type?: MenuItemType; - icon?: IconNameType | string; - label: string | React.ReactNode; - description?: string; - onClick?: () => any; - menuItemStyles?: MenuItemProps; - }[]; - }[]; + menuList: MenuItemData[]; }; const typeMapStyle: Record = { diff --git a/packages/web/components/common/MyPopover/index.tsx b/packages/web/components/common/MyPopover/index.tsx index ccc2bfd3b..3d80893a8 100644 --- a/packages/web/components/common/MyPopover/index.tsx +++ b/packages/web/components/common/MyPopover/index.tsx @@ -43,11 +43,11 @@ const MyPopover = ({ initialFocusRef={firstFieldRef} onOpen={() => { onOpen(); - onOpenFunc && onOpenFunc(); + onOpenFunc?.(); }} onClose={() => { onClose(); - onCloseFunc && onCloseFunc(); + onCloseFunc?.(); }} placement={placement} offset={offset} diff --git a/packages/web/i18n/en/account_team.json b/packages/web/i18n/en/account_team.json index 1e36ff36a..0ea991ac1 100644 --- a/packages/web/i18n/en/account_team.json +++ b/packages/web/i18n/en/account_team.json @@ -6,6 +6,7 @@ "accept": "accept", "action": "operate", "assign_permission": "Permission change", + "audit_log": "audit", "change_department_name": "Department Editor", "change_member_name": "Member name change", "change_member_name_self": "Change member name", @@ -32,6 +33,13 @@ "create_invoice": "Issuing invoices", "create_org": "Create organization", "create_sub_org": "Create sub-organization", + "dataset.api_file": "API Import", + "dataset.common_dataset": "Dataset", + "dataset.external_file": "External File", + "dataset.feishu_dataset": "Feishu Spreadsheet", + "dataset.folder_dataset": "Folder", + "dataset.website_dataset": "Website Sync", + "dataset.yuque_dataset": "Yuque Knowledge Base", "delete": "delete", "delete_api_key": "Delete the API key", "delete_app": "Delete the workbench application", @@ -46,6 +54,7 @@ "delete_from_team": "Move out of the team", "delete_group": "Delete a group", "delete_org": "Delete organization", + "department": "department", "edit_info": "Edit information", "edit_member": "Edit user", "edit_member_tip": "Name", @@ -136,16 +145,12 @@ "login": "Log in", "manage_member": "Managing members", "member": "member", - "department": "department", - "update": "update", - "save_and_publish": "save and publish", "member_group": "Belonging to member group", "move_app": "App location movement", "move_dataset": "Mobile Knowledge Base", "move_member": "Move member", "move_org": "Move organization", "notification_recieve": "Team notification reception", - "operation_log": "log", "org": "organization", "org_description": "Organization description", "org_name": "Organization name", @@ -169,6 +174,7 @@ "restore_tip_title": "Recovery confirmation", "retain_admin_permissions": "Keep administrator rights", "retrain_collection": "Retrain the set", + "save_and_publish": "save and publish", "search_log": "Search log", "search_member": "Search for members", "search_member_group_name": "Search member/group name", @@ -190,14 +196,8 @@ "type.Tool": "Tool", "type.Tool set": "Toolset", "type.Workflow bot": "Workflow", - "dataset.folder_dataset": "Folder", - "dataset.common_dataset": "Dataset", - "dataset.website_dataset": "Website Sync", - "dataset.external_file": "External File", - "dataset.api_file": "API Import", - "dataset.feishu_dataset": "Feishu Spreadsheet", - "dataset.yuque_dataset": "Yuque Knowledge Base", "unlimited": "Unlimited", + "update": "update", "update_api_key": "Update API key", "update_app_collaborator": "Apply permission changes", "update_app_info": "Application information modification", @@ -213,4 +213,4 @@ "user_team_leave_team": "Leave the team", "user_team_leave_team_failed": "Failure to leave the team", "waiting": "To be accepted" -} \ No newline at end of file +} diff --git a/packages/web/i18n/en/chat.json b/packages/web/i18n/en/chat.json index ff931d947..211800445 100644 --- a/packages/web/i18n/en/chat.json +++ b/packages/web/i18n/en/chat.json @@ -71,13 +71,13 @@ "response_embedding_model_tokens": "Vector Model Tokens", "response_hybrid_weight": "Embedding : Full text = {{emb}} : {{text}}", "response_rerank_tokens": "Rearrange Model Tokens", + "search_results": "Search results", "select": "Select", "select_file": "Upload File", "select_file_img": "Upload file / image", "select_img": "Upload Image", "source_cronJob": "Scheduled execution", "stream_output": "Stream Output", - "to_dataset": "Go to the Knowledge Base", "unsupported_file_type": "Unsupported file types", "upload": "Upload", "variable_invisable_in_share": "Custom variables are not visible in login-free links", diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json index d30efe081..39b350d9b 100644 --- a/packages/web/i18n/en/common.json +++ b/packages/web/i18n/en/common.json @@ -180,7 +180,7 @@ "code_error.user_error.balance_not_enough": "Insufficient Account Balance", "code_error.user_error.bin_visitor_guest": "You Are Currently a Guest, Unauthorized to Operate", "code_error.user_error.un_auth_user": "User Not Found", - "comfirm_import": "comfirm_import", + "comfirm_import": "Confirm import", "comfirm_leave_page": "Confirm to Leave This Page?", "comfirn_create": "Confirm Creation", "commercial_function_tip": "Please Upgrade to the Commercial Version to Use This Feature: https://doc.fastgpt.cn/docs/commercial/intro/", @@ -403,7 +403,6 @@ "core.chat.response.module model": "Model", "core.chat.response.module name": "Model Name", "core.chat.response.module query": "Question/Search Term", - "core.chat.response.module quoteList": "Quote Content", "core.chat.response.module similarity": "Similarity", "core.chat.response.module temperature": "Temperature", "core.chat.response.module time": "Run Time", @@ -424,7 +423,6 @@ "core.dataset.Empty Dataset Tips": "No Dataset Yet, Create One Now!", "core.dataset.Folder placeholder": "This is a Directory", "core.dataset.Intro Placeholder": "This Dataset Has No Introduction Yet", - "core.dataset.Manual collection": "Manual Dataset", "core.dataset.My Dataset": "My Dataset", "core.dataset.Query extension intro": "Enabling the question optimization function can improve the accuracy of Dataset searches during continuous conversations. After enabling this function, when performing Dataset searches, the AI will complete the missing information of the question based on the conversation history.", "core.dataset.Quote Length": "Quote Content Length", @@ -434,7 +432,6 @@ "core.dataset.Text collection": "Text Dataset", "core.dataset.apiFile": "API File", "core.dataset.collection.Click top config website": "Click to Configure Website", - "core.dataset.collection.Collection name": "Dataset Name", "core.dataset.collection.Collection raw text": "Dataset Content", "core.dataset.collection.Empty Tip": "The Dataset is Empty", "core.dataset.collection.QA Prompt": "QA Split Prompt", @@ -451,7 +448,6 @@ "core.dataset.collection.metadata.metadata": "Metadata", "core.dataset.collection.metadata.read source": "View Original Content", "core.dataset.collection.metadata.source": "Data Source", - "core.dataset.collection.metadata.source name": "Source Name", "core.dataset.collection.metadata.source size": "Source Size", "core.dataset.collection.status.active": "Ready", "core.dataset.collection.status.error": "Error", @@ -743,7 +739,7 @@ "core.workflow.value": "Value", "core.workflow.variable": "Variable", "create": "Create", - "create_failed": "Creation Failed", + "create_failed": "Create failed", "create_success": "Created Successfully", "create_time": "Creation Time", "cron_job_run_app": "Scheduled Task", @@ -788,7 +784,6 @@ "dataset.dataset_name": "Dataset Name", "dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!", "dataset.test.noResult": "No Search Results", - "dataset_data_import_q_placeholder": "Up to {{maxToken}} words.", "dataset_data_input_a": "Answer", "dataset_data_input_chunk": "Chunk", "dataset_data_input_chunk_content": "Chunk", @@ -802,7 +797,6 @@ "delete_success": "Deleted Successfully", "delete_warning": "Deletion Warning", "embedding_model_not_config": "No index model is detected", - "error.Create failed": "Create failed", "error.code_error": "Verification code error", "error.fileNotFound": "File not found~", "error.inheritPermissionError": "Inherit permission Error", @@ -1208,6 +1202,7 @@ "templateTags.Writing": "Writing", "template_market": "Template Market", "textarea_variable_picker_tip": "Enter \"/\" to select a variable", + "to_dataset": "To dataset", "ui.textarea.Magnifying": "Magnifying", "un_used": "Unused", "unauth_token": "The certificate has expired, please log in again", @@ -1306,4 +1301,4 @@ "zoomin_tip_mac": "Zoom Out ⌘ -", "zoomout_tip": "Zoom In ctrl +", "zoomout_tip_mac": "Zoom In ⌘ +" -} \ No newline at end of file +} diff --git a/packages/web/i18n/en/dataset.json b/packages/web/i18n/en/dataset.json index 6a7b1e560..fa29d8b66 100644 --- a/packages/web/i18n/en/dataset.json +++ b/packages/web/i18n/en/dataset.json @@ -8,12 +8,11 @@ "auto_indexes_tips": "Additional index generation is performed through large models to improve semantic richness and improve retrieval accuracy.", "auto_training_queue": "Enhanced index queueing", "backup_collection": "Backup data", - "backup_data_parse": "Backup data is being parsed", - "backup_data_uploading": "Backup data is being uploaded: {{num}}%", "backup_dataset": "Backup import", "backup_dataset_success": "The backup was created successfully", "backup_dataset_tip": "You can reimport the downloaded csv file when exporting the knowledge base.", "backup_mode": "Backup import", + "backup_template_invalid": "The backup file format is incorrect, it should be the csv file with the first column as q,a,indexes", "chunk_max_tokens": "max_tokens", "chunk_process_params": "Block processing parameters", "chunk_size": "Block size", @@ -28,16 +27,21 @@ "collection.training_type": "Chunk type", "collection_data_count": "Data amount", "collection_metadata_custom_pdf_parse": "PDF enhancement analysis", + "collection_name": "Collection name", "collection_not_support_retraining": "This collection type does not support retuning parameters", "collection_not_support_sync": "This collection does not support synchronization", "collection_sync": "Sync data", "collection_sync_confirm_tip": "Confirm to start synchronizing data? \nThe system will pull the latest data for comparison. If the contents are different, a new collection will be created and the old collection will be deleted. Please confirm!", "collection_tags": "Collection Tags", + "common.dataset.data.Input Error Tip": "[Image Dataset] Process error:", + "common.error.unKnow": "Unknown error", "common_dataset": "General Dataset", "common_dataset_desc": "Building a knowledge base by importing files, web page links, or manual entry", "condition": "condition", "config_sync_schedule": "Configure scheduled synchronization", + "confirm_import_images": "Total {{num}} | Confirm create", "confirm_to_rebuild_embedding_tip": "Are you sure you want to switch the index for the Dataset?\nSwitching the index is a significant operation that requires re-indexing all data in your Dataset, which may take a long time. Please ensure your account has sufficient remaining points.\n\nAdditionally, you need to update the applications that use this Dataset to avoid conflicts with other indexed model Datasets.", + "core.dataset.Image collection": "Image dataset", "core.dataset.import.Adjust parameters": "Adjust parameters", "custom_data_process_params": "Custom", "custom_data_process_params_desc": "Customize data processing rules", @@ -46,8 +50,10 @@ "data_error_amount": "{{errorAmount}} Group training exception", "data_index_image": "Image index", "data_index_num": "Index {{index}}", + "data_parsing": "Data analysis", "data_process_params": "Params", "data_process_setting": "Processing config", + "data_uploading": "Data is being uploaded: {{num}}%", "dataset.Chunk_Number": "Block number", "dataset.Completed": "Finish", "dataset.Delete_Chunk": "delete", @@ -67,7 +73,9 @@ "dataset.no_tags": "No tags available", "default_params": "default", "default_params_desc": "Use system default parameters and rules", + "download_csv_template": "Click to download the CSV template", "edit_dataset_config": "Edit knowledge base configuration", + "empty_collection": "Blank dataset", "enhanced_indexes": "Index enhancement", "error.collectionNotFound": "Collection not found~", "external_file": "External File Library", @@ -90,6 +98,7 @@ "image_auto_parse": "Automatic image indexing", "image_auto_parse_tips": "Call VLM to automatically label the pictures in the document and generate additional search indexes", "image_training_queue": "Queue of image processing", + "images_creating": "Creating", "immediate_sync": "Immediate Synchronization", "import.Auto mode Estimated Price Tips": "The text understanding model needs to be called, which requires more points: {{price}} points/1K tokens", "import.Embedding Estimated Price Tips": "Only use the index model and consume a small amount of AI points: {{price}} points/1K tokens", @@ -104,6 +113,8 @@ "index_size": "Index size", "index_size_tips": "When vectorized, the system will automatically further segment the blocks according to this size.", "input_required_field_to_select_baseurl": "Please enter the required information first", + "insert_images": "Added pictures", + "insert_images_success": "The new picture is successfully added, and you need to wait for the training to be completed before it will be displayed.", "is_open_schedule": "Enable scheduled synchronization", "keep_image": "Keep the picture", "loading": "Loading...", @@ -135,6 +146,7 @@ "process.Image_Index": "Image index generation", "process.Is_Ready": "Ready", "process.Is_Ready_Count": "{{count}} Group is ready", + "process.Parse_Image": "Image analysis", "process.Parsing": "Parsing", "process.Vectorizing": "Index vectorization", "process.Waiting": "Queue", @@ -174,18 +186,28 @@ "tag.searchOrAddTag": "Search or Add Tag", "tag.tags": "Tags", "tag.total_tags": "Total {{total}} tags", + "template_dataset": "Template import", + "template_file_invalid": "The template file format is incorrect, it should be the csv file with the first column as q,a,indexes", + "template_mode": "Template import", "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt", "total_num_files": "Total {{total}} files", "training.Error": "{{count}} Group exception", "training.Normal": "Normal", "training_mode": "Chunk mode", + "training_queue_tip": "Training queue status", "training_ready": "{{count}} Group", + "upload_by_template_format": "Upload by template file", + "uploading_progress": "Uploading: {{num}}%", "vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens", + "vector_training_queue": "Vector training queue", "vllm_model": "Image understanding model", + "vlm_model_required_tooltip": "A Vision Language Model is required to create image collections", + "vlm_model_required_warning": "Image datasets require a Vision Language Model (VLM) to be configured. Please add a model that supports image understanding in the model configuration first.", + "waiting_for_training": "Waiting for training", "website_dataset": "Website Sync", "website_dataset_desc": "Build knowledge base by crawling web page data in batches", "website_info": "Website Information", - "yuque_dataset": "Yuque Dataset", - "yuque_dataset_config": "Yuque Dataset Config", - "yuque_dataset_desc": "Can build a dataset using Yuque documents by configuring permissions, without secondary storage" + "yuque_dataset": "Yuque Knowledge Base", + "yuque_dataset_config": "Configure Yuque Knowledge Base", + "yuque_dataset_desc": "Build knowledge base using Yuque documents by configuring document permissions, documents will not be stored twice" } diff --git a/packages/web/i18n/en/file.json b/packages/web/i18n/en/file.json index 2bd7a3061..a95d7ca3a 100644 --- a/packages/web/i18n/en/file.json +++ b/packages/web/i18n/en/file.json @@ -1,9 +1,32 @@ { + "Action": "Please select the image to upload", + "All images import failed": "All pictures failed to import", + "Dataset_ID_not_found": "The dataset ID does not exist", + "Failed_to_get_token": "Failed to obtain the token", + "Image_ID_copied": "Copy ID", + "Image_Preview": "Picture preview", + "Image_dataset_requires_VLM_model_to_be_configured": "The image dataset needs to be configured with the image understanding model (VLM) to be used. Please add a model that supports image understanding in the model configuration first.", + "Image_does_not_belong_to_current_team": "The picture does not belong to the current team", + "Image_file_does_not_exist": "The picture does not exist", + "Loading_image": "Loading the picture...", + "Loading_image failed": "Preview loading failed", + "Only_support_uploading_one_image": "Only support uploading one image", + "Please select the image to upload": "Please select the image to upload", + "Please select the image to upload select the image to upload": "", + "Please wait for all files to upload": "Please wait for all files to be uploaded to complete", "bucket_chat": "Conversation Files", "bucket_file": "Dataset Documents", "click_to_view_raw_source": "Click to View Original Source", + "common.dataset_data_input_image_support_format": "Support .jpg, .jpeg, .png, .gif, .webp formats", + "delete_image": "Delete pictures", "file_name": "Filename", "file_size": "Filesize", + "image": "picture", + "image_collection": "Picture collection", + "image_description": "Image description", + "image_description_tip": "Please enter the description of the picture", + "please_upload_image_first": "Please upload the picture first", + "reached_max_file_count": "Maximum file count reached", "release_the_mouse_to_upload_the_file": "Release Mouse to Upload File", "select_and_drag_file_tip": "Click or Drag Files Here to Upload", "select_file_amount_limit": "You can select up to {{max}} files", @@ -12,7 +35,11 @@ "support_file_type": "Supports {{fileType}} file types", "support_max_count": "Supports up to {{maxCount}} files", "support_max_size": "Maximum file size is {{maxSize}}", + "template_csv_file_select_tip": "Only support {{fileType}} files that are strictly in accordance with template format", + "template_strict_highlight": "Strictly follow the template", + "total_files": "Total {{selectFiles.length}} files", + "upload_error_description": "Only multiple files or a single folder can be uploaded at a time", "upload_failed": "Upload Failed", - "reached_max_file_count": "Maximum file count reached", - "upload_error_description": "Only multiple files or a single folder can be uploaded at a time" -} \ No newline at end of file + "upload_file_error": "Please upload pictures", + "uploading": "Uploading..." +} diff --git a/packages/web/i18n/zh-CN/account_team.json b/packages/web/i18n/zh-CN/account_team.json index 193c4c8d8..9c5ab3359 100644 --- a/packages/web/i18n/zh-CN/account_team.json +++ b/packages/web/i18n/zh-CN/account_team.json @@ -6,6 +6,7 @@ "accept": "接受", "action": "操作", "assign_permission": "权限变更", + "audit_log": "审计", "change_department_name": "部门编辑", "change_member_name": "成员改名", "change_member_name_self": "变更成员名", @@ -33,6 +34,13 @@ "create_invoice": "开发票", "create_org": "创建部门", "create_sub_org": "创建子部门", + "dataset.api_file": "API导入", + "dataset.common_dataset": "知识库", + "dataset.external_file": "外部文件", + "dataset.feishu_dataset": "飞书多维表格", + "dataset.folder_dataset": "文件夹", + "dataset.website_dataset": "网站同步", + "dataset.yuque_dataset": "语雀知识库", "delete": "删除", "delete_api_key": "删除api密钥", "delete_app": "删除工作台应用", @@ -47,6 +55,7 @@ "delete_from_team": "移出团队", "delete_group": "删除群组", "delete_org": "删除部门", + "department": "部门", "edit_info": "编辑信息", "edit_member": "编辑用户", "edit_member_tip": "成员名", @@ -138,16 +147,12 @@ "login": "登录", "manage_member": "管理成员", "member": "成员", - "department": "部门", - "update": "更新", - "save_and_publish": "保存并发布", "member_group": "所属群组", "move_app": "应用位置移动", "move_dataset": "移动知识库", "move_member": "移动成员", "move_org": "移动部门", "notification_recieve": "团队通知接收", - "operation_log": "日志", "org": "部门", "org_description": "介绍", "org_name": "部门名称", @@ -171,6 +176,7 @@ "restore_tip_title": "恢复确认", "retain_admin_permissions": "保留管理员权限", "retrain_collection": "重新训练集合", + "save_and_publish": "保存并发布", "search_log": "搜索日志", "search_member": "搜索成员", "search_member_group_name": "搜索成员/群组名称", @@ -192,14 +198,8 @@ "type.Tool": "工具", "type.Tool set": "工具集", "type.Workflow bot": "工作流", - "dataset.folder_dataset": "文件夹", - "dataset.common_dataset": "知识库", - "dataset.website_dataset": "网站同步", - "dataset.external_file": "外部文件", - "dataset.api_file": "API导入", - "dataset.feishu_dataset": "飞书多维表格", - "dataset.yuque_dataset": "语雀知识库", "unlimited": "无限制", + "update": "更新", "update_api_key": "更新api密钥", "update_app_collaborator": "应用权限更改", "update_app_info": "应用信息修改", @@ -215,4 +215,4 @@ "user_team_leave_team": "离开团队", "user_team_leave_team_failed": "离开团队失败", "waiting": "待接受" -} \ No newline at end of file +} diff --git a/packages/web/i18n/zh-CN/chat.json b/packages/web/i18n/zh-CN/chat.json index 89d37dcd6..87307af07 100644 --- a/packages/web/i18n/zh-CN/chat.json +++ b/packages/web/i18n/zh-CN/chat.json @@ -71,13 +71,13 @@ "response_embedding_model_tokens": "向量模型 Tokens", "response_hybrid_weight": "语义检索 : 全文检索 = {{emb}} : {{text}}", "response_rerank_tokens": "重排模型 Tokens", + "search_results": "搜索结果", "select": "选择", "select_file": "上传文件", "select_file_img": "上传文件/图片", "select_img": "上传图片", "source_cronJob": "定时执行", "stream_output": "流输出", - "to_dataset": "前往知识库", "unsupported_file_type": "不支持的文件类型", "upload": "上传", "variable_invisable_in_share": "自定义变量在免登录链接中不可见", diff --git a/packages/web/i18n/zh-CN/common.json b/packages/web/i18n/zh-CN/common.json index c583992d4..7cfc615c2 100644 --- a/packages/web/i18n/zh-CN/common.json +++ b/packages/web/i18n/zh-CN/common.json @@ -403,7 +403,6 @@ "core.chat.response.module model": "模型", "core.chat.response.module name": "模型名", "core.chat.response.module query": "问题/检索词", - "core.chat.response.module quoteList": "引用内容", "core.chat.response.module similarity": "相似度", "core.chat.response.module temperature": "温度", "core.chat.response.module time": "运行时长", @@ -424,7 +423,6 @@ "core.dataset.Empty Dataset Tips": "还没有知识库,快去创建一个吧!", "core.dataset.Folder placeholder": "这是一个目录", "core.dataset.Intro Placeholder": "这个知识库还没有介绍~", - "core.dataset.Manual collection": "手动数据集", "core.dataset.My Dataset": "我的知识库", "core.dataset.Query extension intro": "开启问题优化功能,可以提高提高连续对话时,知识库搜索的精度。开启该功能后,在进行知识库搜索时,会根据对话记录,利用 AI 补全问题缺失的信息。", "core.dataset.Quote Length": "引用内容长度", @@ -434,7 +432,6 @@ "core.dataset.Text collection": "文本数据集", "core.dataset.apiFile": "API 文件", "core.dataset.collection.Click top config website": "点击配置网站", - "core.dataset.collection.Collection name": "数据集名称", "core.dataset.collection.Collection raw text": "数据集内容", "core.dataset.collection.Empty Tip": "数据集空空如也", "core.dataset.collection.QA Prompt": "QA 拆分引导词", @@ -451,7 +448,6 @@ "core.dataset.collection.metadata.metadata": "元数据", "core.dataset.collection.metadata.read source": "查看原始内容", "core.dataset.collection.metadata.source": "数据来源", - "core.dataset.collection.metadata.source name": "来源名", "core.dataset.collection.metadata.source size": "来源大小", "core.dataset.collection.status.active": "已就绪", "core.dataset.collection.status.error": "训练异常", @@ -743,7 +739,7 @@ "core.workflow.value": "值", "core.workflow.variable": "变量", "create": "去创建", - "create_failed": "创建异常", + "create_failed": "创建失败", "create_success": "创建成功", "create_time": "创建时间", "cron_job_run_app": "定时任务", @@ -788,7 +784,6 @@ "dataset.dataset_name": "知识库名称", "dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!", "dataset.test.noResult": "搜索结果为空", - "dataset_data_import_q_placeholder": "最多 {{maxToken}} 字。", "dataset_data_input_a": "答案", "dataset_data_input_chunk": "常规模式", "dataset_data_input_chunk_content": "内容", @@ -802,7 +797,6 @@ "delete_success": "删除成功", "delete_warning": "删除警告", "embedding_model_not_config": "检测到没有可用的索引模型", - "error.Create failed": "创建失败", "error.code_error": "验证码错误", "error.fileNotFound": "文件找不到了~", "error.inheritPermissionError": "权限继承错误", @@ -1208,6 +1202,7 @@ "templateTags.Writing": "文本创作", "template_market": "模板市场", "textarea_variable_picker_tip": "输入\"/\"可选择变量", + "to_dataset": "前往知识库", "ui.textarea.Magnifying": "放大", "un_used": "未使用", "unauth_token": "凭证已过期,请重新登录", @@ -1306,4 +1301,4 @@ "zoomin_tip_mac": "缩小 ⌘ -", "zoomout_tip": "放大 ctrl +", "zoomout_tip_mac": "放大 ⌘ +" -} \ No newline at end of file +} diff --git a/packages/web/i18n/zh-CN/dataset.json b/packages/web/i18n/zh-CN/dataset.json index 455343d17..eb9a41ea3 100644 --- a/packages/web/i18n/zh-CN/dataset.json +++ b/packages/web/i18n/zh-CN/dataset.json @@ -8,12 +8,11 @@ "auto_indexes_tips": "通过大模型进行额外索引生成,提高语义丰富度,提高检索的精度。", "auto_training_queue": "增强索引排队", "backup_collection": "备份数据", - "backup_data_parse": "备份数据解析中", - "backup_data_uploading": "备份数据上传中: {{num}}%", "backup_dataset": "备份导入", "backup_dataset_success": "备份创建成功", "backup_dataset_tip": "可以将导出知识库时,下载的 csv 文件重新导入。", "backup_mode": "备份导入", + "backup_template_invalid": "备份文件格式不正确,应该是首列为 q,a,indexes 的 csv 文件", "chunk_max_tokens": "分块上限", "chunk_process_params": "分块处理参数", "chunk_size": "分块大小", @@ -28,16 +27,21 @@ "collection.training_type": "处理模式", "collection_data_count": "数据量", "collection_metadata_custom_pdf_parse": "PDF增强解析", + "collection_name": "数据集名称", "collection_not_support_retraining": "该集合类型不支持重新调整参数", "collection_not_support_sync": "该集合不支持同步", "collection_sync": "立即同步", "collection_sync_confirm_tip": "确认开始同步数据?系统将会拉取最新数据进行比较,如果内容不相同,则会创建一个新的集合并删除旧的集合,请确认!", "collection_tags": "集合标签", + "common.dataset.data.Input Error Tip": "[图片数据集] 处理过程错误:", + "common.error.unKnow": "未知错误", "common_dataset": "通用知识库", "common_dataset_desc": "通过导入文件、网页链接或手动录入形式构建知识库", "condition": "条件", "config_sync_schedule": "配置定时同步", + "confirm_import_images": "共 {{num}} 张图片 | 确认创建", "confirm_to_rebuild_embedding_tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作,需要对您知识库内所有数据进行重新索引,时间可能较长,请确保账号内剩余积分充足。\n\n此外,你还需要注意修改选择该知识库的应用,避免它们与其他索引模型知识库混用。", + "core.dataset.Image collection": "图片数据集", "core.dataset.import.Adjust parameters": "调整参数", "custom_data_process_params": "自定义", "custom_data_process_params_desc": "自定义设置数据处理规则", @@ -46,8 +50,10 @@ "data_error_amount": "{{errorAmount}} 组训练异常", "data_index_image": "图片索引", "data_index_num": "索引 {{index}}", + "data_parsing": "数据解析中", "data_process_params": "处理参数", "data_process_setting": "数据处理配置", + "data_uploading": "数据上传中: {{num}}%", "dataset.Chunk_Number": "分块号", "dataset.Completed": "完成", "dataset.Delete_Chunk": "删除", @@ -67,7 +73,9 @@ "dataset.no_tags": "暂无标签", "default_params": "默认", "default_params_desc": "使用系统默认的参数和规则", + "download_csv_template": "点击下载 CSV 模板", "edit_dataset_config": "编辑知识库配置", + "empty_collection": "空白数据集", "enhanced_indexes": "索引增强", "error.collectionNotFound": "集合找不到了~", "external_file": "外部文件库", @@ -90,6 +98,7 @@ "image_auto_parse": "图片自动索引", "image_auto_parse_tips": "调用 VLM 自动标注文档里的图片,并生成额外的检索索引", "image_training_queue": "图片处理排队", + "images_creating": "正在创建", "immediate_sync": "立即同步", "import.Auto mode Estimated Price Tips": "需调用文本理解模型,需要消耗较多AI 积分:{{price}} 积分/1K tokens", "import.Embedding Estimated Price Tips": "仅使用索引模型,消耗少量 AI 积分:{{price}} 积分/1K tokens", @@ -104,6 +113,8 @@ "index_size": "索引大小", "index_size_tips": "向量化时内容的长度,系统会自动按该大小对分块进行进一步的分割。", "input_required_field_to_select_baseurl": "请先输入必填信息", + "insert_images": "新增图片", + "insert_images_success": "新增图片成功,需等待训练完成才会展示", "is_open_schedule": "启用定时同步", "keep_image": "保留图片", "loading": "加载中...", @@ -135,6 +146,7 @@ "process.Image_Index": "图片索引生成", "process.Is_Ready": "已就绪", "process.Is_Ready_Count": "{{count}} 组已就绪", + "process.Parse_Image": "图片解析中", "process.Parsing": "内容解析中", "process.Vectorizing": "索引向量化", "process.Waiting": "排队中", @@ -173,14 +185,21 @@ "tag.searchOrAddTag": "搜索或添加标签", "tag.tags": "标签", "tag.total_tags": "共{{total}}个标签", + "template_dataset": "模版导入", + "template_file_invalid": "模板文件格式不正确,应该是首列为 q,a,indexes 的 csv 文件", + "template_mode": "模板导入", "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引", "total_num_files": "共 {{total}} 个文件", "training.Error": "{{count}} 组异常", + "training.Image mode": "图片处理", "training.Normal": "正常", "training_mode": "处理方式", "training_ready": "{{count}} 组", + "upload_by_template_format": "按模版文件上传", + "uploading_progress": "上传中: {{num}}%", "vector_model_max_tokens_tip": "每个分块数据,最大长度为 3000 tokens", "vllm_model": "图片理解模型", + "vlm_model_required_warning": "需要图片理解模型", "website_dataset": "Web 站点同步", "website_dataset_desc": "通过爬虫,批量爬取网页数据构建知识库", "website_info": "网站信息", diff --git a/packages/web/i18n/zh-CN/file.json b/packages/web/i18n/zh-CN/file.json index 839fe4c82..2e8b76ad1 100644 --- a/packages/web/i18n/zh-CN/file.json +++ b/packages/web/i18n/zh-CN/file.json @@ -1,9 +1,33 @@ { + "Action": "请选择要上传的图片", + "All images import failed": "所有图片导入失败", + "Dataset_ID_not_found": "数据集ID不存在", + "Failed_to_get_token": "获取令牌失败", + "Image_ID_copied": "已复制ID", + "Image_Preview": "图片预览", + "Image_dataset_requires_VLM_model_to_be_configured": "图片数据集需要配置图片理解模型(VLM)才能使用,请先在模型配置中添加支持图片理解的模型", + "Image_does_not_belong_to_current_team": "图片不属于当前团队", + "Image_file_does_not_exist": "图片不存在", + "Loading_image": "加载图片中...", + "Loading_image failed": "预览加载失败", + "Only_support_uploading_one_image": "仅支持上传一张图片", + "Please select the image to upload": "请选择要上传的图片", + "Please wait for all files to upload": "请等待所有文件上传完成", "bucket_chat": "对话文件", "bucket_file": "知识库文件", "click_to_view_raw_source": "点击查看来源", + "common.Some images failed to process": "部分图片处理失败", + "common.dataset_data_input_image_support_format": "支持 .jpg, .jpeg, .png, .gif, .webp 格式", + "count.core.dataset.collection.Create Success": "成功导入 {{count}} 张图片", + "delete_image": "删除图片", "file_name": "文件名", "file_size": "文件大小", + "image": "图片", + "image_collection": "图片集合", + "image_description": "图片描述", + "image_description_tip": "请输入图片的描述内容", + "please_upload_image_first": "请先上传图片", + "reached_max_file_count": "已达到最大文件数量", "release_the_mouse_to_upload_the_file": "松开鼠标上传文件", "select_and_drag_file_tip": "点击或拖动文件到此处上传", "select_file_amount_limit": "最多选择 {{max}} 个文件", @@ -12,7 +36,11 @@ "support_file_type": "支持 {{fileType}} 类型文件", "support_max_count": "最多支持 {{maxCount}} 个文件", "support_max_size": "单个文件最大 {{maxSize}}", + "template_csv_file_select_tip": "仅支持严格按照模板填写的 {{fileType}} 文件", + "template_strict_highlight": "严格按照模版", + "total_files": "共{{selectFiles.length}}个文件", + "upload_error_description": "单次只支持上传多个文件或者一个文件夹", "upload_failed": "上传异常", - "reached_max_file_count": "已达到最大文件数量", - "upload_error_description": "单次只支持上传多个文件或者一个文件夹" -} \ No newline at end of file + "upload_file_error": "请上传图片", + "uploading": "正在上传..." +} diff --git a/packages/web/i18n/zh-Hant/account_team.json b/packages/web/i18n/zh-Hant/account_team.json index e601bb7f4..a1c585b78 100644 --- a/packages/web/i18n/zh-Hant/account_team.json +++ b/packages/web/i18n/zh-Hant/account_team.json @@ -6,6 +6,7 @@ "accept": "接受", "action": "操作", "assign_permission": "權限變更", + "audit_log": "審計", "change_department_name": "部門編輯", "change_member_name": "成員改名", "change_member_name_self": "變更成員名", @@ -32,6 +33,13 @@ "create_invoice": "開發票", "create_org": "建立部門", "create_sub_org": "建立子部門", + "dataset.api_file": "API 匯入", + "dataset.common_dataset": "知識庫", + "dataset.external_file": "外部文件", + "dataset.feishu_dataset": "飛書多維表格", + "dataset.folder_dataset": "資料夾", + "dataset.website_dataset": "網站同步", + "dataset.yuque_dataset": "語雀知識庫", "delete": "刪除", "delete_api_key": "刪除api密鑰", "delete_app": "刪除工作台應用", @@ -46,6 +54,7 @@ "delete_from_team": "移出團隊", "delete_group": "刪除群組", "delete_org": "刪除部門", + "department": "部門", "edit_info": "編輯訊息", "edit_member": "編輯使用者", "edit_member_tip": "成員名", @@ -136,16 +145,12 @@ "login": "登入", "manage_member": "管理成員", "member": "成員", - "department": "部門", - "update": "更新", - "save_and_publish": "儲存並發布", "member_group": "所屬成員組", "move_app": "應用位置移動", "move_dataset": "移動知識庫", "move_member": "移動成員", "move_org": "行動部門", "notification_recieve": "團隊通知接收", - "operation_log": "紀錄", "org": "組織", "org_description": "介紹", "org_name": "部門名稱", @@ -169,6 +174,7 @@ "restore_tip_title": "恢復確認", "retain_admin_permissions": "保留管理員權限", "retrain_collection": "重新訓練集合", + "save_and_publish": "儲存並發布", "search_log": "搜索日誌", "search_member": "搜索成員", "search_member_group_name": "搜尋成員/群組名稱", @@ -190,14 +196,8 @@ "type.Tool": "工具", "type.Tool set": "工具集", "type.Workflow bot": "工作流程", - "dataset.folder_dataset": "資料夾", - "dataset.common_dataset": "知識庫", - "dataset.website_dataset": "網站同步", - "dataset.external_file": "外部文件", - "dataset.api_file": "API 匯入", - "dataset.feishu_dataset": "飛書多維表格", - "dataset.yuque_dataset": "語雀知識庫", "unlimited": "無限制", + "update": "更新", "update_api_key": "更新api密鑰", "update_app_collaborator": "應用權限更改", "update_app_info": "應用信息修改", @@ -213,4 +213,4 @@ "user_team_leave_team": "離開團隊", "user_team_leave_team_failed": "離開團隊失敗", "waiting": "待接受" -} \ No newline at end of file +} diff --git a/packages/web/i18n/zh-Hant/chat.json b/packages/web/i18n/zh-Hant/chat.json index 53164268f..34ff1f7b3 100644 --- a/packages/web/i18n/zh-Hant/chat.json +++ b/packages/web/i18n/zh-Hant/chat.json @@ -71,13 +71,13 @@ "response_embedding_model_tokens": "向量模型 Tokens", "response_hybrid_weight": "語義檢索 : 全文檢索 = {{emb}} : {{text}}", "response_rerank_tokens": "重排模型 Tokens", + "search_results": "搜索結果", "select": "選取", "select_file": "上傳檔案", "select_file_img": "上傳檔案 / 圖片", "select_img": "上傳圖片", "source_cronJob": "定時執行", "stream_output": "串流輸出", - "to_dataset": "前往知識庫", "unsupported_file_type": "不支援的檔案類型", "upload": "上傳", "variable_invisable_in_share": "自定義變數在免登入連結中不可見", diff --git a/packages/web/i18n/zh-Hant/common.json b/packages/web/i18n/zh-Hant/common.json index 2fed428b3..10d4690c0 100644 --- a/packages/web/i18n/zh-Hant/common.json +++ b/packages/web/i18n/zh-Hant/common.json @@ -403,7 +403,6 @@ "core.chat.response.module model": "模型", "core.chat.response.module name": "模型名稱", "core.chat.response.module query": "問題/搜尋詞", - "core.chat.response.module quoteList": "引用內容", "core.chat.response.module similarity": "相似度", "core.chat.response.module temperature": "溫度", "core.chat.response.module time": "執行時長", @@ -424,7 +423,6 @@ "core.dataset.Empty Dataset Tips": "還沒有知識庫,快來建立一個吧!", "core.dataset.Folder placeholder": "這是一個目錄", "core.dataset.Intro Placeholder": "這個知識庫還沒有介紹", - "core.dataset.Manual collection": "手動資料集", "core.dataset.My Dataset": "我的知識庫", "core.dataset.Query extension intro": "開啟問題最佳化功能,可以提高連續對話時知識庫搜尋的準確度。開啟此功能後,在進行知識庫搜尋時,系統會根據對話記錄,利用 AI 補充問題中缺少的資訊。", "core.dataset.Quote Length": "引用內容長度", @@ -434,7 +432,6 @@ "core.dataset.Text collection": "文字資料集", "core.dataset.apiFile": "API 檔案", "core.dataset.collection.Click top config website": "點選設定網站", - "core.dataset.collection.Collection name": "資料集名稱", "core.dataset.collection.Collection raw text": "資料集內容", "core.dataset.collection.Empty Tip": "資料集是空的", "core.dataset.collection.QA Prompt": "問答拆分提示詞", @@ -451,7 +448,6 @@ "core.dataset.collection.metadata.metadata": "中繼資料", "core.dataset.collection.metadata.read source": "檢視原始內容", "core.dataset.collection.metadata.source": "資料來源", - "core.dataset.collection.metadata.source name": "來源名稱", "core.dataset.collection.metadata.source size": "來源大小", "core.dataset.collection.status.active": "已就緒", "core.dataset.collection.status.error": "訓練異常", @@ -555,7 +551,7 @@ "core.dataset.training.Agent queue": "問答訓練排隊中", "core.dataset.training.Auto mode": "補充索引", "core.dataset.training.Auto mode Tip": "透過子索引以及呼叫模型產生相關問題與摘要,來增加資料區塊的語意豐富度,更有利於檢索。需要消耗更多的儲存空間並增加 AI 呼叫次數。", - "core.dataset.training.Chunk mode": "分塊存儲", + "core.dataset.training.Chunk mode": "分塊儲存", "core.dataset.training.Full": "預計 20 分鐘以上", "core.dataset.training.Leisure": "閒置", "core.dataset.training.QA mode": "問答對提取", @@ -788,7 +784,6 @@ "dataset.dataset_name": "知識庫名稱", "dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!", "dataset.test.noResult": "搜尋結果為空", - "dataset_data_import_q_placeholder": "最多 {{maxToken}} 字。", "dataset_data_input_a": "答案", "dataset_data_input_chunk": "常規模式", "dataset_data_input_chunk_content": "內容", @@ -802,7 +797,6 @@ "delete_success": "刪除成功", "delete_warning": "刪除警告", "embedding_model_not_config": "偵測到沒有可用的索引模型", - "error.Create failed": "建立失敗", "error.code_error": "驗證碼錯誤", "error.fileNotFound": "找不到檔案", "error.inheritPermissionError": "繼承權限錯誤", @@ -1208,6 +1202,7 @@ "templateTags.Writing": "文字創作", "template_market": "模板市場", "textarea_variable_picker_tip": "輸入「/」以選擇變數", + "to_dataset": "前往知識庫", "ui.textarea.Magnifying": "放大", "un_used": "未使用", "unauth_token": "憑證已過期,請重新登入", @@ -1306,4 +1301,4 @@ "zoomin_tip_mac": "縮小 ⌘ -", "zoomout_tip": "放大 ctrl +", "zoomout_tip_mac": "放大 ⌘ +" -} \ No newline at end of file +} diff --git a/packages/web/i18n/zh-Hant/dataset.json b/packages/web/i18n/zh-Hant/dataset.json index 463d81a6e..d5bc64a43 100644 --- a/packages/web/i18n/zh-Hant/dataset.json +++ b/packages/web/i18n/zh-Hant/dataset.json @@ -8,11 +8,11 @@ "auto_indexes_tips": "透過大模型進行額外索引生成,提高語義豐富度,提高檢索的精度。", "auto_training_queue": "增強索引排隊", "backup_collection": "備份數據", - "backup_data_uploading": "備份數據上傳中: {{num}}%", "backup_dataset": "備份導入", "backup_dataset_success": "備份創建成功", "backup_dataset_tip": "可以將導出知識庫時,下載的 csv 文件重新導入。", "backup_mode": "備份導入", + "backup_template_invalid": "備份文件格式不正確,應該是首列為 q,a,indexes 的 csv 文件", "chunk_max_tokens": "分塊上限", "chunk_process_params": "分塊處理參數", "chunk_size": "分塊大小", @@ -26,16 +26,21 @@ "collection.training_type": "處理模式", "collection_data_count": "資料量", "collection_metadata_custom_pdf_parse": "PDF 增強解析", + "collection_name": "數據集名稱", "collection_not_support_retraining": "此集合類型不支援重新調整參數", "collection_not_support_sync": "該集合不支援同步", "collection_sync": "立即同步", "collection_sync_confirm_tip": "確認開始同步資料?\n系統將會拉取最新資料進行比較,如果內容不相同,則會建立一個新的集合並刪除舊的集合,請確認!", "collection_tags": "集合標籤", + "common.dataset.data.Input Error Tip": "[圖片數據集] 處理過程錯誤:", + "common.error.unKnow": "未知錯誤", "common_dataset": "通用資料集", "common_dataset_desc": "通過導入文件、網頁鏈接或手動錄入形式構建知識庫", "condition": "條件", "config_sync_schedule": "設定定時同步", + "confirm_import_images": "共 {{num}} 張圖片 | 確認創建", "confirm_to_rebuild_embedding_tip": "確定要為資料集切換索引嗎?\n切換索引是一個重要的操作,需要對您資料集內所有資料重新建立索引,可能需要較長時間,請確保帳號內剩餘點數充足。\n\n此外,您還需要注意修改使用此資料集的應用程式,避免與其他索引模型資料集混用。", + "core.dataset.Image collection": "圖片數據集", "core.dataset.import.Adjust parameters": "調整參數", "custom_data_process_params": "自訂", "custom_data_process_params_desc": "自訂資料處理規則", @@ -44,8 +49,10 @@ "data_error_amount": "{{errorAmount}} 組訓練異常", "data_index_image": "圖片索引", "data_index_num": "索引 {{index}}", + "data_parsing": "數據解析中", "data_process_params": "處理參數", "data_process_setting": "資料處理設定", + "data_uploading": "數據上傳中: {{num}}%", "dataset.Chunk_Number": "分塊號", "dataset.Completed": "完成", "dataset.Delete_Chunk": "刪除", @@ -65,7 +72,9 @@ "dataset.no_tags": "尚無標籤", "default_params": "預設", "default_params_desc": "使用系統預設的參數和規則", + "download_csv_template": "點擊下載 CSV 模板", "edit_dataset_config": "編輯知識庫設定", + "empty_collection": "空白數據集", "enhanced_indexes": "索引增強", "error.collectionNotFound": "找不到集合", "external_file": "外部檔案庫", @@ -88,6 +97,7 @@ "image_auto_parse": "圖片自動索引", "image_auto_parse_tips": "呼叫 VLM 自動標註文件裡的圖片,並生成額外的檢索索引", "image_training_queue": "圖片處理排隊", + "images_creating": "正在創建", "immediate_sync": "立即同步", "import.Auto mode Estimated Price Tips": "需呼叫文字理解模型,將消耗較多 AI 點數:{{price}} 點數 / 1K tokens", "import.Embedding Estimated Price Tips": "僅使用索引模型,消耗少量 AI 點數:{{price}} 點數 / 1K tokens", @@ -102,6 +112,8 @@ "index_size": "索引大小", "index_size_tips": "向量化時內容的長度,系統會自動按該大小對分塊進行進一步的分割。", "input_required_field_to_select_baseurl": "請先輸入必填信息", + "insert_images": "新增圖片", + "insert_images_success": "新增圖片成功,需等待訓練完成才會展示", "is_open_schedule": "啟用定時同步", "keep_image": "保留圖片", "loading": "加載中...", @@ -133,6 +145,7 @@ "process.Image_Index": "圖片索引生成", "process.Is_Ready": "已就緒", "process.Is_Ready_Count": "{{count}} 組已就緒", + "process.Parse_Image": "圖片解析中", "process.Parsing": "內容解析中", "process.Vectorizing": "索引向量化", "process.Waiting": "排隊中", @@ -171,14 +184,20 @@ "tag.searchOrAddTag": "搜尋或新增標籤", "tag.tags": "標籤", "tag.total_tags": "共 {{total}} 個標籤", + "template_dataset": "模版導入", + "template_file_invalid": "模板文件格式不正確,應該是首列為 q,a,indexes 的 csv 文件", + "template_mode": "模板導入", "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中", "total_num_files": "共 {{total}} 個文件", "training.Error": "{{count}} 組異常", + "training.Image mode": "圖片處理", "training.Normal": "正常", "training_mode": "分段模式", "training_ready": "{{count}} 組", + "upload_by_template_format": "按模版文件上傳", "vector_model_max_tokens_tip": "每個分塊資料,最大長度為 3000 tokens", "vllm_model": "圖片理解模型", + "vlm_model_required_warning": "需要圖片理解模型", "website_dataset": "網站同步", "website_dataset_desc": "通過爬蟲,批量爬取網頁數據構建知識庫", "website_info": "網站資訊", diff --git a/packages/web/i18n/zh-Hant/file.json b/packages/web/i18n/zh-Hant/file.json index bb297ce9b..eb7223cbd 100644 --- a/packages/web/i18n/zh-Hant/file.json +++ b/packages/web/i18n/zh-Hant/file.json @@ -1,9 +1,31 @@ { + "Action": "請選擇要上傳的圖片", + "All images import failed": "所有圖片導入失敗", + "Dataset_ID_not_found": "數據集ID不存在", + "Failed_to_get_token": "獲取令牌失敗", + "Image_ID_copied": "已復制ID", + "Image_Preview": "圖片預覽", + "Image_dataset_requires_VLM_model_to_be_configured": "圖片數據集需要配置圖片理解模型(VLM)才能使用,請先在模型配置中添加支持圖片理解的模型", + "Image_does_not_belong_to_current_team": "圖片不屬於當前團隊", + "Image_file_does_not_exist": "圖片不存在", + "Loading_image": "加載圖片中...", + "Loading_image_failed": "預覽加載失敗", + "Only_support_uploading_one_image": "僅支持上傳一張圖片", + "Please select the image to upload": "請選擇要上傳的圖片", + "Please select the image to upload select the image to upload": "", + "Please wait for all files to upload": "請等待所有文件上傳完成", "bucket_chat": "對話檔案", "bucket_file": "知識庫檔案", "click_to_view_raw_source": "點選檢視原始來源", + "dataset_data_input_image_support_format": "支持 .jpg, .jpeg, .png, .gif, .webp 格式", + "delete_image": "刪除圖片", "file_name": "檔案名稱", "file_size": "檔案大小", + "image": "圖片", + "image_collection": "圖片集合", + "image_description_tip": "請輸入圖片的描述內容", + "please_upload_image_first": "請先上傳圖片", + "reached_max_file_count": "已達檔案數量上限", "release_the_mouse_to_upload_the_file": "放開滑鼠以上傳檔案", "select_and_drag_file_tip": "點選或拖曳檔案至此處上傳", "select_file_amount_limit": "最多可選擇 {{max}} 個檔案", @@ -12,7 +34,11 @@ "support_file_type": "支援 {{fileType}} 格式的檔案", "support_max_count": "最多可支援 {{maxCount}} 個檔案", "support_max_size": "單一檔案大小上限為 {{maxSize}}", + "template_csv_file_select_tip": "僅支持嚴格按照模板格式的 {{fileType}} 文件", + "template_strict_highlight": "嚴格按照模版", + "total_files": "共{{selectFiles.length}}個文件", + "upload_error_description": "單次僅支援上傳多個檔案或一個資料夾", "upload_failed": "上傳失敗", - "reached_max_file_count": "已達檔案數量上限", - "upload_error_description": "單次僅支援上傳多個檔案或一個資料夾" -} \ No newline at end of file + "upload_file_error": "請上傳圖片", + "uploading": "正在上傳..." +} diff --git a/projects/app/src/components/MyImage/index.tsx b/projects/app/src/components/MyImage/index.tsx index da95109c0..9c9f58d47 100644 --- a/projects/app/src/components/MyImage/index.tsx +++ b/projects/app/src/components/MyImage/index.tsx @@ -3,38 +3,28 @@ import { Skeleton, type ImageProps } from '@chakra-ui/react'; import CustomImage from '@fastgpt/web/components/common/Image/MyImage'; export const MyImage = (props: ImageProps) => { - const [isLoading, setIsLoading] = useState(true); const [succeed, setSucceed] = useState(false); + return ( - - { - setIsLoading(false); - setSucceed(true); - }} - onError={() => setIsLoading(false)} - onClick={() => { - if (!succeed) return; - window.open(props.src, '_blank'); - }} - {...props} - /> - + { + setSucceed(true); + }} + onClick={() => { + if (!succeed) return; + window.open(props.src, '_blank'); + }} + {...props} + /> ); }; diff --git a/projects/app/src/components/common/NextHead/index.tsx b/projects/app/src/components/common/NextHead/index.tsx index d6210cff4..a5cbf11f9 100644 --- a/projects/app/src/components/common/NextHead/index.tsx +++ b/projects/app/src/components/common/NextHead/index.tsx @@ -18,7 +18,7 @@ const NextHead = ({ title, icon, desc }: { title?: string; icon?: string; desc?: name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,minimum-scale=1.0,user-scalable=no, viewport-fit=cover" /> - + {desc && } {icon && } diff --git a/projects/app/src/components/core/app/InputGuideConfig.tsx b/projects/app/src/components/core/app/InputGuideConfig.tsx index 0af949c53..f773d7225 100644 --- a/projects/app/src/components/core/app/InputGuideConfig.tsx +++ b/projects/app/src/components/core/app/InputGuideConfig.tsx @@ -240,7 +240,7 @@ const LexiconConfigModal = ({ appId, onClose }: { appId: string; onClose: () => onSuccess() { setNewData(undefined); }, - errorToast: t('common:error.Create failed') + errorToast: t('common:create_failed') } ); diff --git a/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/QuoteList.tsx b/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/QuoteList.tsx index cf1cf33c0..4026f8608 100644 --- a/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/QuoteList.tsx +++ b/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/QuoteList.tsx @@ -57,11 +57,12 @@ const QuoteList = React.memo(function QuoteList({ return { ...item, q: currentFilterItem?.q || '', - a: currentFilterItem?.a || '' + a: currentFilterItem?.a || '', + imagePreivewUrl: currentFilterItem?.imagePreivewUrl }; } - return { ...item, q: item.q || '', a: item.a || '' }; + return { ...item, q: item.q || '' }; }); return processedData.sort((a, b) => { @@ -87,6 +88,7 @@ const QuoteList = React.memo(function QuoteList({ diff --git a/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/ResponseTags.tsx b/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/ResponseTags.tsx index 65f3580f8..4202b76fb 100644 --- a/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/ResponseTags.tsx +++ b/projects/app/src/components/core/chat/ChatContainer/ChatBox/components/ResponseTags.tsx @@ -81,7 +81,9 @@ const ResponseTags = ({ .map((item) => ({ sourceName: item.sourceName, sourceId: item.sourceId, - icon: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }), + icon: item.imageId + ? 'core/dataset/imageFill' + : getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }), collectionId: item.collectionId, datasetId: item.datasetId })); diff --git a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx index 2da6dbc8a..bcb91de9a 100644 --- a/projects/app/src/components/core/chat/components/WholeResponseModal.tsx +++ b/projects/app/src/components/core/chat/components/WholeResponseModal.tsx @@ -300,7 +300,7 @@ export const WholeResponseContent = ({ {activeModule.quoteList && activeModule.quoteList.length > 0 && ( } /> )} diff --git a/projects/app/src/components/core/dataset/QuoteItem.tsx b/projects/app/src/components/core/dataset/QuoteItem.tsx index 2fcb9803b..bbf79e9d2 100644 --- a/projects/app/src/components/core/dataset/QuoteItem.tsx +++ b/projects/app/src/components/core/dataset/QuoteItem.tsx @@ -8,7 +8,11 @@ import { useTranslation } from 'next-i18next'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import dynamic from 'next/dynamic'; import MyBox from '@fastgpt/web/components/common/MyBox'; -import { SearchScoreTypeEnum, SearchScoreTypeMap } from '@fastgpt/global/core/dataset/constants'; +import { + DatasetCollectionTypeEnum, + SearchScoreTypeEnum, + SearchScoreTypeMap +} from '@fastgpt/global/core/dataset/constants'; import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read'; import Markdown from '@/components/Markdown'; @@ -88,11 +92,13 @@ export const formatScore = (score: ScoreItemType[]) => { const QuoteItem = ({ quoteItem, canViewSource, + canEditData, canEditDataset, ...RawSourceBoxProps }: { quoteItem: SearchDataResponseItemType; canViewSource?: boolean; + canEditData?: boolean; canEditDataset?: boolean; } & Omit) => { const { t } = useTranslation(); @@ -206,7 +212,7 @@ const QuoteItem = ({ {...RawSourceBoxProps} /> - {quoteItem.id && canEditDataset && ( + {quoteItem.id && canEditData && ( - {t('chat:to_dataset')} + {t('common:to_dataset')} )} diff --git a/projects/app/src/components/core/dataset/RawSourceBox.tsx b/projects/app/src/components/core/dataset/RawSourceBox.tsx index 0db958a33..2455d7093 100644 --- a/projects/app/src/components/core/dataset/RawSourceBox.tsx +++ b/projects/app/src/components/core/dataset/RawSourceBox.tsx @@ -3,20 +3,22 @@ import { Box, type BoxProps } from '@chakra-ui/react'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import { useTranslation } from 'next-i18next'; import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource'; -import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils'; +import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils'; import MyIcon from '@fastgpt/web/components/common/Icon'; import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read'; +import type { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants'; type Props = BoxProps & readCollectionSourceBody & { + collectionType?: DatasetCollectionTypeEnum; sourceName?: string; - collectionId: string; sourceId?: string; canView?: boolean; }; const RawSourceBox = ({ sourceId, + collectionType, sourceName = '', canView = true, @@ -35,7 +37,10 @@ const RawSourceBox = ({ const canPreview = !!sourceId && canView; - const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]); + const icon = useMemo( + () => getCollectionIcon({ type: collectionType, sourceId, name: sourceName }), + [collectionType, sourceId, sourceName] + ); const read = getCollectionSourceAndOpen({ collectionId, appId, diff --git a/projects/app/src/global/core/dataset/api.d.ts b/projects/app/src/global/core/dataset/api.d.ts index 7c9aa0dbb..fe1cc1c5b 100644 --- a/projects/app/src/global/core/dataset/api.d.ts +++ b/projects/app/src/global/core/dataset/api.d.ts @@ -6,7 +6,7 @@ import type { APIFileServer, FeishuServer, YuqueServer -} from '@fastgpt/global/core/dataset/apiDataset'; +} from '@fastgpt/global/core/dataset/apiDataset/type'; import type { DatasetSearchModeEnum, DatasetTypeEnum @@ -17,6 +17,7 @@ import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants'; import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type'; +import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type'; import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type'; import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants'; import { PermissionValueType } from '@fastgpt/global/support/permission/type'; @@ -31,9 +32,7 @@ export type CreateDatasetParams = { vectorModel?: string; agentModel?: string; vlmModel?: string; - apiServer?: APIFileServer; - feishuServer?: FeishuServer; - yuqueServer?: YuqueServer; + apiDatasetServer?: ApiDatasetServerType; }; export type RebuildEmbeddingProps = { diff --git a/projects/app/src/global/core/dataset/type.d.ts b/projects/app/src/global/core/dataset/type.d.ts index 41cd315e8..ddca53cb0 100644 --- a/projects/app/src/global/core/dataset/type.d.ts +++ b/projects/app/src/global/core/dataset/type.d.ts @@ -34,9 +34,11 @@ export type DatasetDataListItemType = { _id: string; datasetId: string; collectionId: string; - q: string; // embedding content - a: string; // bonus content + q?: string; + a?: string; + imageId?: string; + imageSize?: number; + imagePreviewUrl?: string; //image preview url chunkIndex?: number; updated?: boolean; - // indexes: DatasetDataSchemaType['indexes']; }; diff --git a/projects/app/src/pageComponents/account/team/OperationLog/index.tsx b/projects/app/src/pageComponents/account/team/OperationLog/index.tsx index 9e56cd0db..8a19a5f42 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/index.tsx +++ b/projects/app/src/pageComponents/account/team/OperationLog/index.tsx @@ -1,6 +1,5 @@ import { Box, - Button, Flex, Table, TableContainer, @@ -11,12 +10,11 @@ import { Tr, HStack } from '@chakra-ui/react'; -import { useState, useEffect, useMemo } from 'react'; +import { useState, useEffect, useMemo, useCallback } from 'react'; import { useTranslation } from 'next-i18next'; import MyBox from '@fastgpt/web/components/common/MyBox'; import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination'; import { getOperationLogs } from '@/web/support/user/team/operantionLog/api'; -import { TeamPermission } from '@fastgpt/global/support/permission/user/controller'; import { operationLogMap } from '@fastgpt/service/support/operationLog/constants'; import { OperationLogEventEnum } from '@fastgpt/global/support/operationLog/constants'; import { formatTime2YMDHMS } from '@fastgpt/global/common/string/time'; @@ -26,7 +24,8 @@ import MultipleSelect, { } from '@fastgpt/web/components/common/MySelect/MultipleSelect'; import Avatar from '@fastgpt/web/components/common/Avatar'; import { getTeamMembers } from '@/web/support/user/team/api'; -import { createMetadataProcessorMap, type MetadataProcessor } from './processors'; +import { specialProcessors } from './processors'; +import { defaultMetadataProcessor } from './processors/commonProcessor'; function OperationLogTable({ Tabs }: { Tabs: React.ReactNode }) { const { t } = useTranslation(); @@ -59,13 +58,14 @@ function OperationLogTable({ Tabs }: { Tabs: React.ReactNode }) { [t] ); - const processMetadataByEvent = useMemo(() => { - const metadataProcessorMap = createMetadataProcessorMap(); - return (event: string, metadata: any) => { - const processor = metadataProcessorMap[event as OperationLogEventEnum]; - return processor ? processor(metadata, t) : metadata; - }; - }, [t]); + const processMetadataByEvent = useCallback( + (event: string, metadata: any) => { + const defaultFormat = defaultMetadataProcessor(metadata, t); + const specialFormat = specialProcessors[event as OperationLogEventEnum]?.(defaultFormat, t); + return specialFormat || defaultFormat; + }, + [t] + ); const { data: operationLogs = [], @@ -182,7 +182,7 @@ function OperationLogTable({ Tabs }: { Tabs: React.ReactNode }) { {formatTime2YMDHMS(log.timestamp)} {t(i18nData.typeLabel)} - {t(i18nData.content, metadata as any) as string} + {t(i18nData.content as any, metadata)} ) : null; })} diff --git a/projects/app/src/pageComponents/account/team/OperationLog/processors/appProcessors.ts b/projects/app/src/pageComponents/account/team/OperationLog/processors/appProcessors.ts index a96cb8731..ac453b4f3 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/processors/appProcessors.ts +++ b/projects/app/src/pageComponents/account/team/OperationLog/processors/appProcessors.ts @@ -1,5 +1,4 @@ import { AppPermission } from '@fastgpt/global/support/permission/app/controller'; -import { createSpecialProcessor } from './commonProcessor'; export const processUpdateAppCollaboratorSpecific = (metadata: any) => { const permissionValue = parseInt(metadata.permission, 10); @@ -12,6 +11,6 @@ export const processUpdateAppCollaboratorSpecific = (metadata: any) => { }; }; -export const createAppProcessors = () => ({ - UPDATE_APP_COLLABORATOR: createSpecialProcessor(processUpdateAppCollaboratorSpecific) -}); +export const createAppProcessors = { + UPDATE_APP_COLLABORATOR: processUpdateAppCollaboratorSpecific +}; diff --git a/projects/app/src/pageComponents/account/team/OperationLog/processors/commonProcessor.ts b/projects/app/src/pageComponents/account/team/OperationLog/processors/commonProcessor.ts index 4ea243cb3..a4556a628 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/processors/commonProcessor.ts +++ b/projects/app/src/pageComponents/account/team/OperationLog/processors/commonProcessor.ts @@ -10,13 +10,11 @@ export interface CommonMetadataFields { export const defaultMetadataProcessor = (metadata: CommonMetadataFields, t: any): any => { const result = { ...metadata }; - const translatableFields = ['appType', 'datasetType', 'operationName', 'itemName']; - - Object.entries(metadata) - .filter(([key, value]) => translatableFields.includes(key) && value) - .forEach(([key, value]) => { + Object.entries(metadata).forEach(([key, value]) => { + if (typeof value === 'string' && value.includes(':')) { result[key] = t(value as any); - }); + } + }); if (metadata.newItemNames) { if (Array.isArray(metadata.newItemNames)) { @@ -33,11 +31,3 @@ export const defaultMetadataProcessor = (metadata: CommonMetadataFields, t: any) return result; }; - -export const createSpecialProcessor = (specificProcessor: (metadata: any) => any) => { - return (metadata: any, t: any) => { - let processedMetadata = defaultMetadataProcessor(metadata, t); - processedMetadata = specificProcessor(processedMetadata); - return processedMetadata; - }; -}; diff --git a/projects/app/src/pageComponents/account/team/OperationLog/processors/datasetProcessors.ts b/projects/app/src/pageComponents/account/team/OperationLog/processors/datasetProcessors.ts index c9c30bda6..968b17657 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/processors/datasetProcessors.ts +++ b/projects/app/src/pageComponents/account/team/OperationLog/processors/datasetProcessors.ts @@ -1,5 +1,4 @@ import { DatasetPermission } from '@fastgpt/global/support/permission/dataset/controller'; -import { createSpecialProcessor } from './commonProcessor'; export const processUpdateDatasetCollaboratorSpecific = (metadata: any) => { const permissionValue = parseInt(metadata.permission, 10); @@ -12,6 +11,6 @@ export const processUpdateDatasetCollaboratorSpecific = (metadata: any) => { }; }; -export const createDatasetProcessors = () => ({ - UPDATE_DATASET_COLLABORATOR: createSpecialProcessor(processUpdateDatasetCollaboratorSpecific) -}); +export const createDatasetProcessors = { + UPDATE_DATASET_COLLABORATOR: processUpdateDatasetCollaboratorSpecific +}; diff --git a/projects/app/src/pageComponents/account/team/OperationLog/processors/index.ts b/projects/app/src/pageComponents/account/team/OperationLog/processors/index.ts index 10981efb4..baaa4e2fc 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/processors/index.ts +++ b/projects/app/src/pageComponents/account/team/OperationLog/processors/index.ts @@ -1,30 +1,11 @@ -import { OperationLogEventEnum } from '@fastgpt/global/support/operationLog/constants'; -import { defaultMetadataProcessor } from './commonProcessor'; +import type { OperationLogEventEnum } from '@fastgpt/global/support/operationLog/constants'; import { createTeamProcessors } from './teamProcessors'; import { createAppProcessors } from './appProcessors'; import { createDatasetProcessors } from './datasetProcessors'; export type MetadataProcessor = (metadata: any, t: any) => any; - -export const createMetadataProcessorMap = (): Record => { - const specialProcessors: Partial> = { - ...createTeamProcessors(), - ...createAppProcessors(), - ...createDatasetProcessors() - }; - - const processorMap = {} as Record; - - Object.values(OperationLogEventEnum).forEach((event) => { - processorMap[event] = - specialProcessors[event] || - ((metadata: any, t: any) => defaultMetadataProcessor(metadata, t)); - }); - - return processorMap; +export const specialProcessors: Partial> = { + ...createTeamProcessors, + ...createAppProcessors, + ...createDatasetProcessors }; - -export * from './commonProcessor'; -export * from './teamProcessors'; -export * from './appProcessors'; -export * from './datasetProcessors'; diff --git a/projects/app/src/pageComponents/account/team/OperationLog/processors/teamProcessors.ts b/projects/app/src/pageComponents/account/team/OperationLog/processors/teamProcessors.ts index 928a8c3c1..147d1b733 100644 --- a/projects/app/src/pageComponents/account/team/OperationLog/processors/teamProcessors.ts +++ b/projects/app/src/pageComponents/account/team/OperationLog/processors/teamProcessors.ts @@ -1,5 +1,4 @@ import { TeamPermission } from '@fastgpt/global/support/permission/user/controller'; -import { createSpecialProcessor } from './commonProcessor'; export const processAssignPermissionSpecific = (metadata: any) => { const permissionValue = parseInt(metadata.permission, 10); @@ -14,6 +13,6 @@ export const processAssignPermissionSpecific = (metadata: any) => { }; }; -export const createTeamProcessors = () => ({ - ASSIGN_PERMISSION: createSpecialProcessor(processAssignPermissionSpecific) -}); +export const createTeamProcessors = { + ASSIGN_PERMISSION: processAssignPermissionSpecific +}; diff --git a/projects/app/src/pageComponents/chat/ChatQuoteList/CollectionQuoteReader.tsx b/projects/app/src/pageComponents/chat/ChatQuoteList/CollectionQuoteReader.tsx index fe811bb75..ae3f37215 100644 --- a/projects/app/src/pageComponents/chat/ChatQuoteList/CollectionQuoteReader.tsx +++ b/projects/app/src/pageComponents/chat/ChatQuoteList/CollectionQuoteReader.tsx @@ -1,5 +1,8 @@ import { Box, Flex, HStack } from '@chakra-ui/react'; -import { type SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type'; +import { + type DatasetCiteItemType, + type SearchDataResponseItemType +} from '@fastgpt/global/core/dataset/type'; import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils'; import MyIcon from '@fastgpt/web/components/common/Icon'; import { useRouter } from 'next/router'; @@ -20,7 +23,6 @@ import { getCollectionQuote } from '@/web/core/chat/api'; import MyIconButton from '@fastgpt/web/components/common/Icon/button'; import MyBox from '@fastgpt/web/components/common/MyBox'; import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource'; -import { type QuoteDataItemType } from '@/service/core/chat/constants'; const CollectionReader = ({ rawSearch, @@ -99,7 +101,7 @@ const CollectionReader = ({ const formatedDataList = useMemo( () => - datasetDataList.map((item: QuoteDataItemType) => { + datasetDataList.map((item: DatasetCiteItemType) => { const isCurrentSelected = currentQuoteItem?.id === item._id; const quoteIndex = filterResults.findIndex((res) => res.id === item._id); diff --git a/projects/app/src/pageComponents/dataset/ApiDatasetForm.tsx b/projects/app/src/pageComponents/dataset/ApiDatasetForm.tsx index 3588b4742..6447c4091 100644 --- a/projects/app/src/pageComponents/dataset/ApiDatasetForm.tsx +++ b/projects/app/src/pageComponents/dataset/ApiDatasetForm.tsx @@ -3,11 +3,6 @@ import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants'; import { Flex, Input, Button, ModalBody, ModalFooter, Box } from '@chakra-ui/react'; import type { UseFormReturn } from 'react-hook-form'; import { useTranslation } from 'next-i18next'; -import type { - APIFileServer, - FeishuServer, - YuqueServer -} from '@fastgpt/global/core/dataset/apiDataset'; import { getApiDatasetPaths, getApiDatasetCatalog } from '@/web/core/dataset/api'; import type { GetResourceFolderListItemResponse, @@ -22,6 +17,7 @@ import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel'; import MyModal from '@fastgpt/web/components/common/MyModal'; import MyIcon from '@fastgpt/web/components/common/Icon'; import { FolderIcon } from '@fastgpt/global/common/file/image/constants'; +import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type'; const ApiDatasetForm = ({ type, @@ -32,9 +28,7 @@ const ApiDatasetForm = ({ datasetId?: string; form: UseFormReturn< { - apiServer?: APIFileServer; - feishuServer?: FeishuServer; - yuqueServer?: YuqueServer; + apiDatasetServer?: ApiDatasetServerType; }, any >; @@ -42,9 +36,10 @@ const ApiDatasetForm = ({ const { t } = useTranslation(); const { register, setValue, watch } = form; - const yuqueServer = watch('yuqueServer'); - const feishuServer = watch('feishuServer'); - const apiServer = watch('apiServer'); + const apiDatasetServer = watch('apiDatasetServer'); + const yuqueServer = apiDatasetServer?.yuqueServer; + const feishuServer = apiDatasetServer?.feishuServer; + const apiServer = apiDatasetServer?.apiServer; const [pathNames, setPathNames] = useState(t('dataset:rootdirectory')); const [ @@ -91,9 +86,7 @@ const ApiDatasetForm = ({ const path = await getApiDatasetPaths({ datasetId, parentId, - yuqueServer, - feishuServer, - apiServer + apiDatasetServer }); setPathNames(path); }, @@ -108,13 +101,13 @@ const ApiDatasetForm = ({ const value = id === 'root' || id === null || id === undefined ? '' : id; switch (type) { case DatasetTypeEnum.yuque: - setValue('yuqueServer.basePath', value); + setValue('apiDatasetServer.yuqueServer.basePath', value); break; case DatasetTypeEnum.feishu: - setValue('feishuServer.folderToken', value); + setValue('apiDatasetServer.feishuServer.folderToken', value); break; case DatasetTypeEnum.apiDataset: - setValue('apiServer.basePath', value); + setValue('apiDatasetServer.apiServer.basePath', value); break; } @@ -147,32 +140,10 @@ const ApiDatasetForm = ({ { - const params: GetApiDatasetCataLogProps = { parentId: e.parentId }; - - switch (type) { - case DatasetTypeEnum.yuque: - params.yuqueServer = { - userId: yuqueServer?.userId || '', - token: yuqueServer?.token || '', - basePath: '' - }; - break; - // Currently, only Yuque is using it - case DatasetTypeEnum.feishu: - params.feishuServer = { - appId: feishuServer?.appId || '', - appSecret: feishuServer?.appSecret || '', - folderToken: feishuServer?.folderToken || '' - }; - break; - case DatasetTypeEnum.apiDataset: - params.apiServer = { - baseUrl: apiServer?.baseUrl || '', - authorization: apiServer?.authorization || '', - basePath: '' - }; - break; - } + const params: GetApiDatasetCataLogProps = { + parentId: e.parentId, + apiDatasetServer + }; return getApiDatasetCatalog(params); }} @@ -193,7 +164,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={t('dataset:api_url')} maxLength={200} - {...register('apiServer.baseUrl', { required: true })} + {...register('apiDatasetServer.apiServer.baseUrl', { required: true })} /> @@ -204,7 +175,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={t('dataset:request_headers')} maxLength={2000} - {...register('apiServer.authorization')} + {...register('apiDatasetServer.apiServer.authorization')} /> {renderBaseUrlSelector()} @@ -227,7 +198,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={'App ID'} maxLength={200} - {...register('feishuServer.appId', { required: true })} + {...register('apiDatasetServer.feishuServer.appId', { required: true })} /> @@ -244,7 +215,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={'App Secret'} maxLength={200} - {...register('feishuServer.appSecret', { required: true })} + {...register('apiDatasetServer.feishuServer.appSecret', { required: true })} /> @@ -261,7 +232,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={'Folder Token'} maxLength={200} - {...register('feishuServer.folderToken', { required: true })} + {...register('apiDatasetServer.feishuServer.folderToken', { required: true })} /> {/* {renderBaseUrlSelector()} @@ -278,7 +249,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={'User ID'} maxLength={200} - {...register('yuqueServer.userId', { required: true })} + {...register('apiDatasetServer.yuqueServer.userId', { required: true })} /> @@ -289,7 +260,7 @@ const ApiDatasetForm = ({ bg={'myWhite.600'} placeholder={'Token'} maxLength={200} - {...register('yuqueServer.token', { required: true })} + {...register('apiDatasetServer.yuqueServer.token', { required: true })} /> {renderBaseUrlSelector()} diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/BackupImportModal.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/BackupImportModal.tsx index 267ff3b2c..a2b6c9c86 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/BackupImportModal.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/BackupImportModal.tsx @@ -48,9 +48,9 @@ const BackupImportModal = ({ setSelectFiles(e)} /> {/* File render */} {selectFiles.length > 0 && ( @@ -82,8 +82,8 @@ const BackupImportModal = ({ diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx index 6f5c857e0..4633ff85a 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx @@ -17,7 +17,8 @@ import { DatasetCollectionTypeEnum, DatasetTypeEnum, DatasetTypeMap, - DatasetStatusEnum + DatasetStatusEnum, + ApiDatasetTypeMap } from '@fastgpt/global/core/dataset/constants'; import EditFolderModal, { useEditFolder } from '../../EditFolderModal'; import { TabEnum } from '../../../../pages/dataset/detail/index'; @@ -37,6 +38,7 @@ import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip'; const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector')); const BackupImportModal = dynamic(() => import('./BackupImportModal')); +const TemplateImportModal = dynamic(() => import('./TemplateImportModal')); const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { const { t } = useTranslation(); @@ -83,6 +85,12 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { onOpen: onOpenBackupImportModal, onClose: onCloseBackupImportModal } = useDisclosure(); + // Template import modal + const { + isOpen: isOpenTemplateImportModal, + onOpen: onOpenTemplateImportModal, + onClose: onCloseTemplateImportModal + } = useDisclosure(); const { runAsync: onCreateCollection } = useRequest2( async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => { @@ -224,6 +232,15 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { menuList={[ { children: [ + { + label: ( + + + {t('common:Folder')} + + ), + onClick: () => setEditFolderData({}) + }, { label: ( @@ -233,11 +250,32 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { ), onClick: onOpenFileSourceSelector }, + ...(feConfigs?.isPlus + ? [ + { + label: ( + + + {t('dataset:core.dataset.Image collection')} + + ), + onClick: () => + router.replace({ + query: { + ...router.query, + currentTab: TabEnum.import, + source: ImportDataSourceEnum.imageDataset + } + }) + } + ] + : []), + { label: ( - {t('common:core.dataset.Manual collection')} + {t('dataset:empty_collection')} ), onClick: () => { @@ -247,6 +285,19 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual }) }); } + } + ] + }, + { + children: [ + { + label: ( + + + {t('dataset:template_dataset')} + + ), + onClick: onOpenTemplateImportModal }, { label: ( @@ -258,19 +309,6 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { onClick: onOpenBackupImportModal } ] - }, - { - children: [ - { - label: ( - - - {t('common:Folder')} - - ), - onClick: () => setEditFolderData({}) - } - ] } ]} /> @@ -415,9 +453,7 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => { /> )} {/* apiDataset */} - {(datasetDetail?.type === DatasetTypeEnum.apiDataset || - datasetDetail?.type === DatasetTypeEnum.feishu || - datasetDetail?.type === DatasetTypeEnum.yuque) && ( + {datasetDetail?.type && ApiDatasetTypeMap[datasetDetail.type] && ( { name={editFolderData.name} /> )} - + {isOpenFileSourceSelector && } {isOpenBackupImportModal && ( { onClose={onCloseBackupImportModal} /> )} + {isOpenTemplateImportModal && ( + { + getData(1); + }} + onClose={onCloseTemplateImportModal} + /> + )} ); }; diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx index c4a51a8a0..e032f8f27 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TagManageModal.tsx @@ -421,7 +421,7 @@ const AddTagToCollections = ({ () => collectionsList.map((item) => { const collection = item.data; - const icon = getCollectionIcon(collection.type, collection.name); + const icon = getCollectionIcon({ type: collection.type, name: collection.name }); return { id: collection._id, tags: collection.tags, diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TemplateImportModal.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TemplateImportModal.tsx new file mode 100644 index 000000000..53c846ea5 --- /dev/null +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TemplateImportModal.tsx @@ -0,0 +1,160 @@ +import React, { useState } from 'react'; +import MyModal from '@fastgpt/web/components/common/MyModal'; +import { useTranslation } from 'next-i18next'; +import { Box, Button, HStack, ModalBody, ModalFooter, VStack, Flex, Link } from '@chakra-ui/react'; +import FileSelector, { type SelectFileItemType } from '../components/FileSelector'; +import MyIcon from '@fastgpt/web/components/common/Icon'; +import MyIconButton from '@fastgpt/web/components/common/Icon/button'; +import { postTemplateDatasetCollection } from '@/web/core/dataset/api'; +import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; +import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; +import { useContextSelector } from 'use-context-selector'; +import { getDocPath } from '@/web/common/system/doc'; +import { Trans } from 'next-i18next'; + +const TemplateImportModal = ({ + onFinish, + onClose +}: { + onFinish: () => void; + onClose: () => void; +}) => { + const { t } = useTranslation(); + const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId); + + const [selectFiles, setSelectFiles] = useState([]); + const [percent, setPercent] = useState(0); + + const { runAsync: onImport, loading: isImporting } = useRequest2( + async () => { + await postTemplateDatasetCollection({ + datasetId, + file: selectFiles[0].file, + percentListen: setPercent + }); + }, + { + onSuccess() { + onFinish(); + onClose(); + }, + successToast: t('common:import_success') + } + ); + + const handleDownloadTemplate = () => { + const templateContent = `q,a,indexes +"Who are you?","I am an AI assistant, here to help with your questions and provide support. I can assist with learning, daily life queries, and creative ideas.","1. What are you?\n2. What can you do?\n3. What topics can you help with?\n4. How do you assist users?\n5. What's your goal?","Who are you? I am an AI assistant..." +"What are you?","I am an AI assistant designed to help users with their questions and provide support across various topics.","What are you?","I am an AI assistant..."`; + + const blob = new Blob([templateContent], { type: 'text/csv;charset=utf-8;' }); + const link = document.createElement('a'); + + const url = URL.createObjectURL(blob); + link.setAttribute('href', url); + link.setAttribute('download', 'template.csv'); + link.style.visibility = 'hidden'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + }; + + return ( + + + + + {t('dataset:upload_by_template_format')} + + + {t('common:Instructions')} + + + + + + + + }} + /> + + } + /> + + {/* File render */} + {selectFiles.length > 0 && ( + + {selectFiles.map((item, index) => ( + + + {item.name} + + {item.size} + + { + setSelectFiles(selectFiles.filter((_, i) => i !== index)); + }} + /> + + ))} + + )} + + + + + + + + ); +}; + +export default TemplateImportModal; diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TrainingStates.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TrainingStates.tsx index 03f8eeae0..c642aefa1 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/TrainingStates.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/TrainingStates.tsx @@ -35,6 +35,8 @@ import { useForm } from 'react-hook-form'; import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail'; import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination'; import EmptyTip from '@fastgpt/web/components/common/EmptyTip'; +import MyImage from '@/components/MyImage'; +import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel'; enum TrainingStatus { NotStart = 'NotStart', @@ -48,6 +50,8 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes const { t } = useTranslation(); const isQA = trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.qa; + const isImageParse = + trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.imageParse; /* 状态计算 @@ -61,7 +65,10 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes Object.values(trainingDetail.trainingCounts).every((count) => count === 0) && Object.values(trainingDetail.errorCounts).every((count) => count === 0); + const isContentParsing = trainingDetail.trainingCounts.parse > 0; + const getTrainingStatus = ({ errorCount }: { errorCount: number }) => { + if (isContentParsing) return TrainingStatus.NotStart; if (isReady) return TrainingStatus.Ready; if (errorCount > 0) { return TrainingStatus.Error; @@ -92,29 +99,40 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes status: TrainingStatus; errorCount: number; }[] = [ - // { - // label: TrainingProcess.waiting.label, - // status: TrainingStatus.Queued, - // statusText: t('dataset:dataset.Completed') - // }, { label: t(TrainingProcess.parsing.label), - status: TrainingStatus.Ready, - errorCount: 0 + status: (() => { + if (trainingDetail.errorCounts.parse > 0) return TrainingStatus.Error; + if (isContentParsing) return TrainingStatus.Running; + return TrainingStatus.Ready; + })(), + errorCount: trainingDetail.errorCounts.parse }, - ...(isQA + ...(isImageParse ? [ { - errorCount: trainingDetail.errorCounts.qa, - label: t(TrainingProcess.getQA.label), - statusText: getStatusText(TrainingModeEnum.qa), + errorCount: trainingDetail.errorCounts.imageParse, + label: t(TrainingProcess.parseImage.label), + statusText: getStatusText(TrainingModeEnum.imageParse), status: getTrainingStatus({ - errorCount: trainingDetail.errorCounts.qa + errorCount: trainingDetail.errorCounts.imageParse }) } ] : []), - ...(trainingDetail?.advancedTraining.imageIndex && !isQA + ...(isQA + ? [ + { + label: t(TrainingProcess.getQA.label), + statusText: getStatusText(TrainingModeEnum.qa), + status: getTrainingStatus({ + errorCount: trainingDetail.errorCounts.qa + }), + errorCount: trainingDetail.errorCounts.qa + } + ] + : []), + ...(trainingDetail?.advancedTraining.imageIndex ? [ { errorCount: trainingDetail.errorCounts.image, @@ -126,7 +144,7 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes } ] : []), - ...(trainingDetail?.advancedTraining.autoIndexes && !isQA + ...(trainingDetail?.advancedTraining.autoIndexes ? [ { errorCount: trainingDetail.errorCounts.auto, @@ -159,7 +177,17 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes ]; return states; - }, [trainingDetail, t, isQA]); + }, [ + trainingDetail.queuedCounts, + trainingDetail.trainingCounts, + trainingDetail.errorCounts, + trainingDetail?.advancedTraining.imageIndex, + trainingDetail?.advancedTraining.autoIndexes, + trainingDetail.trainedCount, + t, + isImageParse, + isQA + ]); return ( @@ -254,11 +282,21 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes ); }; -const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionId: string }) => { +const ErrorView = ({ + datasetId, + collectionId, + refreshTrainingDetail +}: { + datasetId: string; + collectionId: string; + refreshTrainingDetail: () => void; +}) => { const { t } = useTranslation(); const TrainingText = { + [TrainingModeEnum.parse]: t('dataset:process.Parsing'), [TrainingModeEnum.chunk]: t('dataset:process.Vectorizing'), [TrainingModeEnum.qa]: t('dataset:process.Get QA'), + [TrainingModeEnum.imageParse]: t('dataset:process.Image_Index'), [TrainingModeEnum.image]: t('dataset:process.Image_Index'), [TrainingModeEnum.auto]: t('dataset:process.Auto_Index') }; @@ -308,6 +346,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI manual: true, onSuccess: () => { refreshList(); + refreshTrainingDetail(); setEditChunk(undefined); } } @@ -316,6 +355,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI if (editChunk) { return ( setEditChunk(undefined)} onSave={(data) => { @@ -401,10 +441,12 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI }; const EditView = ({ + loading, editChunk, onCancel, onSave }: { + loading: boolean; editChunk: getTrainingDataDetailResponse; onCancel: () => void; onSave: (data: { q: string; a?: string }) => void; @@ -419,20 +461,41 @@ const EditView = ({ return ( - {editChunk?.a && q} - + {editChunk?.imagePreviewUrl && ( + + {t('file:image')} + + + + + )} + + + {(editChunk?.a || editChunk?.imagePreviewUrl) && ( + + {editChunk?.a + ? t('common:dataset_data_input_chunk_content') + : t('common:dataset_data_input_q')} + + )} + + + {editChunk?.a && ( - <> - a + + {t('common:dataset_data_input_a')} - + )} - @@ -453,14 +516,15 @@ const TrainingStates = ({ const { t } = useTranslation(); const [tab, setTab] = useState(defaultTab); - const { data: trainingDetail, loading } = useRequest2( - () => getDatasetCollectionTrainingDetail(collectionId), - { - pollingInterval: 5000, - pollingWhenHidden: false, - manual: false - } - ); + const { + data: trainingDetail, + loading, + runAsync: refreshTrainingDetail + } = useRequest2(() => getDatasetCollectionTrainingDetail(collectionId), { + pollingInterval: 5000, + pollingWhenHidden: false, + manual: false + }); const errorCounts = (Object.values(trainingDetail?.errorCounts || {}) as number[]).reduce( (acc, count) => acc + count, @@ -493,7 +557,13 @@ const TrainingStates = ({ ]} /> {tab === 'states' && trainingDetail && } - {tab === 'errors' && } + {tab === 'errors' && ( + + )} ); diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx index 33a7de218..5aeeec4eb 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/index.tsx @@ -75,7 +75,7 @@ const CollectionCard = () => { const formatCollections = useMemo( () => collections.map((collection) => { - const icon = getCollectionIcon(collection.type, collection.name); + const icon = getCollectionIcon({ type: collection.type, name: collection.name }); const status = (() => { if (collection.hasError) { return { diff --git a/projects/app/src/pageComponents/dataset/detail/DataCard.tsx b/projects/app/src/pageComponents/dataset/detail/DataCard.tsx index 9e54d1493..28e94c8c5 100644 --- a/projects/app/src/pageComponents/dataset/detail/DataCard.tsx +++ b/projects/app/src/pageComponents/dataset/detail/DataCard.tsx @@ -1,5 +1,5 @@ import React, { useState, useMemo } from 'react'; -import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react'; +import { Box, Card, IconButton, Flex, Button, useTheme, Image } from '@chakra-ui/react'; import { getDatasetDataList, delOneDatasetDataById, @@ -24,28 +24,36 @@ import TagsPopOver from './CollectionCard/TagsPopOver'; import { useSystemStore } from '@/web/common/system/useSystemStore'; import MyDivider from '@fastgpt/web/components/common/MyDivider'; import Markdown from '@/components/Markdown'; -import { useMemoizedFn } from 'ahooks'; +import { useBoolean, useMemoizedFn } from 'ahooks'; import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination'; import { TabEnum } from './NavBar'; import { - DatasetCollectionDataProcessModeEnum, + DatasetCollectionTypeEnum, ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants'; import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; import TrainingStates from './CollectionCard/TrainingStates'; import { getTextValidLength } from '@fastgpt/global/common/string/utils'; import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConfirm'; +import { formatFileSize } from '@fastgpt/global/common/file/tools'; +import MyImage from '@fastgpt/web/components/common/Image/MyImage'; +import dynamic from 'next/dynamic'; + +const InsertImagesModal = dynamic(() => import('./data/InsertImageModal'), { + ssr: false +}); const DataCard = () => { - const theme = useTheme(); const router = useRouter(); const { isPc } = useSystem(); - const { collectionId = '', datasetId } = router.query as { + const { feConfigs } = useSystemStore(); + + const { collectionId = '' } = router.query as { collectionId: string; datasetId: string; }; const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail); - const { feConfigs } = useSystemStore(); + const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId); const { t } = useTranslation(); const [searchText, setSearchText] = useState(''); @@ -78,21 +86,30 @@ const DataCard = () => { const [editDataId, setEditDataId] = useState(); - // get file info - const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), { - refreshDeps: [collectionId], - manual: false, - onError: () => { - router.replace({ - query: { - datasetId - } - }); + // Get collection info + const { data: collection, runAsync: reloadCollection } = useRequest2( + () => getDatasetCollectionById(collectionId), + { + refreshDeps: [collectionId], + manual: false, + onError: () => { + router.replace({ + query: { + datasetId + } + }); + } } - }); + ); const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]); + const [ + isInsertImagesModalOpen, + { setTrue: openInsertImagesModal, setFalse: closeInsertImagesModal } + ] = useBoolean(); + const isImageCollection = collection?.type === DatasetCollectionTypeEnum.images; + const onDeleteOneData = useMemoizedFn(async (dataId: string) => { try { await delOneDatasetDataById(dataId); @@ -125,6 +142,7 @@ const DataCard = () => { > {collection?._id && ( { {t('dataset:retain_collection')} )} - {canWrite && ( + {canWrite && !isImageCollection && ( )} + {canWrite && isImageCollection && ( + + )} @@ -236,7 +265,7 @@ const DataCard = () => { userSelect={'none'} boxShadow={'none'} bg={index % 2 === 1 ? 'myGray.50' : 'blue.50'} - border={theme.borders.sm} + border={'sm'} position={'relative'} overflow={'hidden'} _hover={{ @@ -282,17 +311,35 @@ const DataCard = () => { {/* Data content */} - - - {!!item.a && ( - <> - - - - )} - + {item.imagePreviewUrl ? ( + + + + + + + + + ) : ( + + + {!!item.a && ( + <> + + + + )} + + )} - {/* Mask */} + {/* Footer */} { py={1} mr={2} > - - {getTextValidLength(item.q + item.a || '')} + {item.imageSize ? ( + <>{formatFileSize(item.imageSize)} + ) : ( + <> + + {getTextValidLength((item?.q || '') + (item?.a || ''))} + + )} {canWrite && ( { collectionId={collection._id} dataId={editDataId} onClose={() => setEditDataId(undefined)} - onSuccess={(data) => { + onSuccess={(data: any) => { if (editDataId === '') { refreshList(); return; @@ -386,9 +439,16 @@ const DataCard = () => { datasetId={datasetId} defaultTab={'errors'} collectionId={errorModalId} - onClose={() => setErrorModalId('')} + onClose={() => { + setErrorModalId(''); + refreshList(); + reloadCollection(); + }} /> )} + {isInsertImagesModalOpen && ( + + )} ); }; diff --git a/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx b/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx index 13d7f3b96..a55ccf4c7 100644 --- a/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx @@ -173,6 +173,20 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode { title: t('dataset:import_confirm') } + ], + [ImportDataSourceEnum.imageDataset]: [ + { + title: t('dataset:import_select_file') + }, + { + title: t('dataset:import_param_setting') + }, + { + title: t('dataset:import_data_preview') + }, + { + title: t('dataset:import_confirm') + } ] }; const steps = modeSteps[source]; @@ -238,20 +252,22 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode {/* step */} - - - + {source !== ImportDataSourceEnum.imageDataset && ( + + + + - + )} {children} ); diff --git a/projects/app/src/pageComponents/dataset/detail/Import/components/FileSelector.tsx b/projects/app/src/pageComponents/dataset/detail/Import/components/FileSelector.tsx index a4b640f33..884a1036b 100644 --- a/projects/app/src/pageComponents/dataset/detail/Import/components/FileSelector.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Import/components/FileSelector.tsx @@ -7,15 +7,8 @@ import MyIcon from '@fastgpt/web/components/common/Icon'; import { useTranslation } from 'next-i18next'; import React, { type DragEvent, useCallback, useMemo, useState } from 'react'; import { getNanoid } from '@fastgpt/global/common/string/tools'; -import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; -import { getFileIcon } from '@fastgpt/global/common/file/icon'; import { useSystemStore } from '@/web/common/system/useSystemStore'; -import { uploadFile2DB } from '@/web/common/file/controller'; -import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; import type { ImportSourceItemType } from '@/web/core/dataset/type'; -import { useContextSelector } from 'use-context-selector'; -import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; -import { getErrText } from '@fastgpt/global/common/error/utils'; export type SelectFileItemType = { fileId: string; @@ -26,23 +19,18 @@ export type SelectFileItemType = { const FileSelector = ({ fileType, selectFiles, - setSelectFiles, - onStartSelect, - onFinishSelect, + onSelectFiles, ...props }: { fileType: string; selectFiles: ImportSourceItemType[]; - setSelectFiles: React.Dispatch>; - onStartSelect: () => void; - onFinishSelect: () => void; + onSelectFiles: (e: SelectFileItemType[]) => any; } & FlexProps) => { const { t } = useTranslation(); const { toast } = useToast(); const { feConfigs } = useSystemStore(); - const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId); const maxCount = feConfigs?.uploadFileMaxAmount || 1000; const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024; @@ -65,90 +53,6 @@ const FileSelector = ({ 'i' ); - const { runAsync: onSelectFile, loading: isLoading } = useRequest2( - async (files: SelectFileItemType[]) => { - { - await Promise.all( - files.map(async ({ fileId, file }) => { - try { - const { fileId: uploadFileId } = await uploadFile2DB({ - file, - bucketName: BucketNameEnum.dataset, - data: { - datasetId - }, - percentListen: (e) => { - setSelectFiles((state) => - state.map((item) => - item.id === fileId - ? { - ...item, - uploadedFileRate: item.uploadedFileRate - ? Math.max(e, item.uploadedFileRate) - : e - } - : item - ) - ); - } - }); - setSelectFiles((state) => - state.map((item) => - item.id === fileId - ? { - ...item, - dbFileId: uploadFileId, - isUploading: false, - uploadedFileRate: 100 - } - : item - ) - ); - } catch (error) { - setSelectFiles((state) => - state.map((item) => - item.id === fileId - ? { - ...item, - isUploading: false, - errorMsg: getErrText(error) - } - : item - ) - ); - } - }) - ); - } - }, - { - onBefore([files]) { - onStartSelect(); - setSelectFiles((state) => { - const formatFiles = files.map((selectFile) => { - const { fileId, file } = selectFile; - - return { - id: fileId, - createStatus: 'waiting', - file, - sourceName: file.name, - sourceSize: formatFileSize(file.size), - icon: getFileIcon(file.name), - isUploading: true, - uploadedFileRate: 0 - }; - }); - const results = formatFiles.concat(state).slice(0, maxCount); - return results; - }); - }, - onFinally() { - onFinishSelect(); - } - } - ); - const selectFileCallback = useCallback( (files: SelectFileItemType[]) => { if (selectFiles.length + files.length > maxCount) { @@ -160,7 +64,7 @@ const FileSelector = ({ } // size check if (!maxSize) { - return onSelectFile(files); + return onSelectFiles(files); } const filterFiles = files.filter((item) => item.file.size <= maxSize); @@ -171,9 +75,9 @@ const FileSelector = ({ }); } - return onSelectFile(filterFiles); + return onSelectFiles(filterFiles); }, - [t, maxCount, maxSize, onSelectFile, selectFiles.length, toast] + [t, maxCount, maxSize, onSelectFiles, selectFiles.length, toast] ); const handleDragEnter = (e: DragEvent) => { @@ -278,7 +182,6 @@ const FileSelector = ({ return ( { - {t('common:core.dataset.collection.Collection name')} + {t('dataset:collection_name')} { {...register('name', { required: true })} - placeholder={t('common:core.dataset.collection.Collection name')} + placeholder={t('dataset:collection_name')} bg={'myGray.50'} /> diff --git a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/FileLocal.tsx b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/FileLocal.tsx index 3428a119a..18c60f0c0 100644 --- a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/FileLocal.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/FileLocal.tsx @@ -1,14 +1,20 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'; import { type ImportSourceItemType } from '@/web/core/dataset/type.d'; import { Box, Button } from '@chakra-ui/react'; -import FileSelector from '../components/FileSelector'; +import FileSelector, { type SelectFileItemType } from '../components/FileSelector'; import { useTranslation } from 'next-i18next'; import dynamic from 'next/dynamic'; -import Loading from '@fastgpt/web/components/common/MyLoading'; import { RenderUploadFiles } from '../components/RenderFiles'; import { useContextSelector } from 'use-context-selector'; import { DatasetImportContext } from '../Context'; +import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; +import { uploadFile2DB } from '@/web/common/file/controller'; +import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; +import { getErrText } from '@fastgpt/global/common/error/utils'; +import { formatFileSize } from '@fastgpt/global/common/file/tools'; +import { getFileIcon } from '@fastgpt/global/common/file/icon'; +import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; const DataProcess = dynamic(() => import('../commonProgress/DataProcess')); const PreviewData = dynamic(() => import('../commonProgress/PreviewData')); @@ -33,14 +39,16 @@ export default React.memo(FileLocal); const SelectFile = React.memo(function SelectFile() { const { t } = useTranslation(); + const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v); + const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId); + const [selectFiles, setSelectFiles] = useState( sources.map((source) => ({ isUploading: false, ...source })) ); - const [uploading, setUploading] = useState(false); const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]); useEffect(() => { @@ -53,15 +61,90 @@ const SelectFile = React.memo(function SelectFile() { goToNext(); }, [goToNext]); + const { runAsync: onSelectFiles, loading: uploading } = useRequest2( + async (files: SelectFileItemType[]) => { + { + await Promise.all( + files.map(async ({ fileId, file }) => { + try { + const { fileId: uploadFileId } = await uploadFile2DB({ + file, + bucketName: BucketNameEnum.dataset, + data: { + datasetId + }, + percentListen: (e) => { + setSelectFiles((state) => + state.map((item) => + item.id === fileId + ? { + ...item, + uploadedFileRate: item.uploadedFileRate + ? Math.max(e, item.uploadedFileRate) + : e + } + : item + ) + ); + } + }); + setSelectFiles((state) => + state.map((item) => + item.id === fileId + ? { + ...item, + dbFileId: uploadFileId, + isUploading: false, + uploadedFileRate: 100 + } + : item + ) + ); + } catch (error) { + setSelectFiles((state) => + state.map((item) => + item.id === fileId + ? { + ...item, + isUploading: false, + errorMsg: getErrText(error) + } + : item + ) + ); + } + }) + ); + } + }, + { + onBefore([files]) { + setSelectFiles((state) => { + return [ + ...state, + ...files.map((selectFile) => { + const { fileId, file } = selectFile; + + return { + id: fileId, + createStatus: 'waiting', + file, + sourceName: file.name, + sourceSize: formatFileSize(file.size), + icon: getFileIcon(file.name), + isUploading: true, + uploadedFileRate: 0 + }; + }) + ]; + }); + } + } + ); + return ( - setUploading(true)} - onFinishSelect={() => setUploading(false)} - /> + {/* render files */} diff --git a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ImageDataset.tsx b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ImageDataset.tsx new file mode 100644 index 000000000..20d1f82fd --- /dev/null +++ b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ImageDataset.tsx @@ -0,0 +1,189 @@ +import React, { useState } from 'react'; +import { Box, Button, Flex, Input, Image } from '@chakra-ui/react'; +import { useTranslation } from 'next-i18next'; +import { useRouter } from 'next/router'; +import { TabEnum } from '../../NavBar'; +import { createImageDatasetCollection } from '@/web/core/dataset/image/api'; +import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel'; +import { useForm } from 'react-hook-form'; +import FileSelector, { type SelectFileItemType } from '../components/FileSelector'; +import type { ImportSourceItemType } from '@/web/core/dataset/type'; +import { getNanoid } from '@fastgpt/global/common/string/tools'; +import MyIcon from '@fastgpt/web/components/common/Icon'; +import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; +import { useContextSelector } from 'use-context-selector'; +import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; +import { DatasetImportContext } from '../Context'; +import MyImage from '@fastgpt/web/components/common/Image/MyImage'; + +const fileType = '.jpg, .jpeg, .png'; + +const ImageDataset = () => { + return ; +}; + +export default React.memo(ImageDataset); + +const SelectFile = React.memo(function SelectFile() { + const { t } = useTranslation(); + const router = useRouter(); + + const parentId = useContextSelector(DatasetImportContext, (v) => v.parentId); + const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId); + + const [selectFiles, setSelectFiles] = useState([]); + const [uploadProgress, setUploadProgress] = useState(0); + + const { register, handleSubmit } = useForm({ + defaultValues: { + name: '' + } + }); + + const onSelectFiles = (files: SelectFileItemType[]) => { + setSelectFiles((pre) => { + const formatFiles = Array.from(files).map((item) => { + const previewUrl = URL.createObjectURL(item.file); + + return { + id: getNanoid(), + createStatus: 'waiting', + file: item.file, + sourceName: item.file.name, + icon: previewUrl + }; + }); + + return [...pre, ...formatFiles]; + }); + }; + const onRemoveFile = (index: number) => { + setSelectFiles((prev) => { + return prev.filter((_, i) => i !== index); + }); + }; + + const { runAsync: onCreate, loading: creating } = useRequest2( + async ({ name: collectionName }: { name: string }) => { + return await createImageDatasetCollection({ + parentId, + datasetId, + collectionName, + files: selectFiles.map((item) => item.file!).filter(Boolean), + onUploadProgress: setUploadProgress + }); + }, + { + manual: true, + successToast: t('common:create_success'), + onSuccess() { + router.replace({ + query: { + datasetId: router.query.datasetId, + currentTab: TabEnum.collectionCard + } + }); + } + } + ); + + return ( + + + + {t('dataset:collection_name')} + + + + + + + + {t('common:core.dataset.collection.Collection raw text')} + + + + + + + {selectFiles.length > 0 && ( + + {selectFiles.map((file, index) => ( + + + onRemoveFile(index)} + className="close-icon" + display={['', 'none']} + zIndex={10} + /> + + ))} + + )} + + + + + + + + ); +}); diff --git a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ReTraining.tsx b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ReTraining.tsx index 08771300c..1d3a69682 100644 --- a/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ReTraining.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Import/diffSource/ReTraining.tsx @@ -37,7 +37,7 @@ const ReTraining = () => { apiFileId: collection.apiFileId, createStatus: 'waiting', - icon: getCollectionIcon(collection.type, collection.name), + icon: getCollectionIcon({ type: collection.type, name: collection.name }), id: collection._id, isUploading: false, sourceName: collection.name, diff --git a/projects/app/src/pageComponents/dataset/detail/Import/index.tsx b/projects/app/src/pageComponents/dataset/detail/Import/index.tsx index f9d253ff3..345a360e8 100644 --- a/projects/app/src/pageComponents/dataset/detail/Import/index.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Import/index.tsx @@ -11,6 +11,7 @@ const FileCustomText = dynamic(() => import('./diffSource/FileCustomText')); const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile')); const APIDatasetCollection = dynamic(() => import('./diffSource/APIDataset')); const ReTraining = dynamic(() => import('./diffSource/ReTraining')); +const ImageDataset = dynamic(() => import('./diffSource/ImageDataset')); const ImportDataset = () => { const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource); @@ -22,6 +23,8 @@ const ImportDataset = () => { if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText; if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection; if (importSource === ImportDataSourceEnum.apiDataset) return APIDatasetCollection; + if (importSource === ImportDataSourceEnum.imageDataset) return ImageDataset; + return null; }, [importSource]); return ImportComponent ? ( diff --git a/projects/app/src/pageComponents/dataset/detail/Info/components/EditApiServiceModal.tsx b/projects/app/src/pageComponents/dataset/detail/Info/components/EditApiServiceModal.tsx index d49b84f44..016528ce0 100644 --- a/projects/app/src/pageComponents/dataset/detail/Info/components/EditApiServiceModal.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Info/components/EditApiServiceModal.tsx @@ -5,23 +5,17 @@ import { useTranslation } from 'next-i18next'; import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; import { useForm } from 'react-hook-form'; import { useToast } from '@fastgpt/web/hooks/useToast'; -import { - type APIFileServer, - type FeishuServer, - type YuqueServer -} from '@fastgpt/global/core/dataset/apiDataset'; import ApiDatasetForm from '@/pageComponents/dataset/ApiDatasetForm'; import { useContextSelector } from 'use-context-selector'; import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; -import { datasetTypeCourseMap } from '@/web/core/dataset/constants'; import { getDocPath } from '@/web/common/system/doc'; import MyIcon from '@fastgpt/web/components/common/Icon'; +import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type'; +import { DatasetTypeMap } from '@fastgpt/global/core/dataset/constants'; export type EditAPIDatasetInfoFormType = { id: string; - apiServer?: APIFileServer; - yuqueServer?: YuqueServer; - feishuServer?: FeishuServer; + apiDatasetServer?: ApiDatasetServerType; }; const EditAPIDatasetInfoModal = ({ @@ -60,7 +54,7 @@ const EditAPIDatasetInfoModal = ({ return ( - {datasetTypeCourseMap[type] && ( + {DatasetTypeMap[type]?.courseUrl && ( {t('dataset:apidataset_configuration')} @@ -71,7 +65,7 @@ const EditAPIDatasetInfoModal = ({ color={'primary.600'} fontSize={'sm'} cursor={'pointer'} - onClick={() => window.open(getDocPath(datasetTypeCourseMap[type]), '_blank')} + onClick={() => window.open(getDocPath(DatasetTypeMap[type].courseUrl!), '_blank')} > {t('common:Instructions')} diff --git a/projects/app/src/pageComponents/dataset/detail/Info/index.tsx b/projects/app/src/pageComponents/dataset/detail/Info/index.tsx index 9c2037256..4f46ff37f 100644 --- a/projects/app/src/pageComponents/dataset/detail/Info/index.tsx +++ b/projects/app/src/pageComponents/dataset/detail/Info/index.tsx @@ -311,12 +311,12 @@ const Info = ({ datasetId }: { datasetId: string }) => { onClick={() => setEditedAPIDataset({ id: datasetDetail._id, - apiServer: datasetDetail.apiServer + apiDatasetServer: datasetDetail.apiDatasetServer }) } /> - {datasetDetail.apiServer?.baseUrl} + {datasetDetail.apiDatasetServer?.apiServer?.baseUrl} )} @@ -336,12 +336,12 @@ const Info = ({ datasetId }: { datasetId: string }) => { onClick={() => setEditedAPIDataset({ id: datasetDetail._id, - yuqueServer: datasetDetail.yuqueServer + apiDatasetServer: datasetDetail.apiDatasetServer }) } /> - {datasetDetail.yuqueServer?.userId} + {datasetDetail.apiDatasetServer?.yuqueServer?.userId} )} @@ -361,12 +361,14 @@ const Info = ({ datasetId }: { datasetId: string }) => { onClick={() => setEditedAPIDataset({ id: datasetDetail._id, - feishuServer: datasetDetail.feishuServer + apiDatasetServer: datasetDetail.apiDatasetServer }) } /> - {datasetDetail.feishuServer?.folderToken} + + {datasetDetail.apiDatasetServer?.feishuServer?.folderToken} + )} @@ -435,9 +437,7 @@ const Info = ({ datasetId }: { datasetId: string }) => { onEdit={(data) => updateDataset({ id: datasetId, - apiServer: data.apiServer, - yuqueServer: data.yuqueServer, - feishuServer: data.feishuServer + apiDatasetServer: data.apiDatasetServer }) } /> diff --git a/projects/app/src/pageComponents/dataset/detail/InputDataModal.tsx b/projects/app/src/pageComponents/dataset/detail/InputDataModal.tsx index b6bf568f4..567a06d67 100644 --- a/projects/app/src/pageComponents/dataset/detail/InputDataModal.tsx +++ b/projects/app/src/pageComponents/dataset/detail/InputDataModal.tsx @@ -1,37 +1,39 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; -import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack } from '@chakra-ui/react'; -import { type UseFormRegister, useFieldArray, useForm } from 'react-hook-form'; +import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack, Image } from '@chakra-ui/react'; +import type { UseFormRegister } from 'react-hook-form'; +import { useFieldArray, useForm } from 'react-hook-form'; import { postInsertData2Dataset, putDatasetDataById, getDatasetCollectionById, getDatasetDataItemById } from '@/web/core/dataset/api'; -import { useToast } from '@fastgpt/web/hooks/useToast'; import MyIcon from '@fastgpt/web/components/common/Icon'; import MyModal from '@fastgpt/web/components/common/MyModal'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import { useTranslation } from 'next-i18next'; import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; -import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils'; -import { type DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type'; +import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils'; +import type { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type'; import DeleteIcon from '@fastgpt/web/components/common/Icon/delete'; import { defaultCollectionDetail } from '@/web/core/dataset/constants'; import MyBox from '@fastgpt/web/components/common/MyBox'; -import { getErrText } from '@fastgpt/global/common/error/utils'; import { useSystemStore } from '@/web/common/system/useSystemStore'; import styles from './styles.module.scss'; import { DatasetDataIndexTypeEnum, getDatasetIndexMapData } from '@fastgpt/global/core/dataset/data/constants'; +import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants'; import FillRowTabs from '@fastgpt/web/components/common/Tabs/FillRowTabs'; import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel'; import MyIconButton from '@fastgpt/web/components/common/Icon/button'; +import MyImage from '@/components/MyImage/index'; export type InputDataType = { q: string; a: string; + imagePreivewUrl?: string; indexes: (Omit & { dataId?: string; // pg data id fold: boolean; @@ -40,7 +42,8 @@ export type InputDataType = { enum TabEnum { chunk = 'chunk', - qa = 'qa' + qa = 'qa', + image = 'image' } const InputDataModal = ({ @@ -52,17 +55,16 @@ const InputDataModal = ({ }: { collectionId: string; dataId?: string; - defaultValue?: { q: string; a?: string }; + defaultValue?: { q?: string; a?: string; imagePreivewUrl?: string }; onClose: () => void; onSuccess: (data: InputDataType & { dataId: string }) => void; }) => { const { t } = useTranslation(); - const { toast } = useToast(); const { embeddingModelList, defaultModels } = useSystemStore(); - const [currentTab, setCurrentTab] = useState(TabEnum.chunk); + const [currentTab, setCurrentTab] = useState(); - const { register, handleSubmit, reset, control } = useForm(); + const { register, handleSubmit, reset, control, watch } = useForm(); const { fields: indexes, prepend: prependIndexes, @@ -72,16 +74,24 @@ const InputDataModal = ({ control, name: 'indexes' }); + const imagePreivewUrl = watch('imagePreivewUrl'); const { data: collection = defaultCollectionDetail } = useRequest2( - () => { - return getDatasetCollectionById(collectionId); - }, + () => getDatasetCollectionById(collectionId), { manual: false, - refreshDeps: [collectionId] + refreshDeps: [collectionId], + onSuccess(res) { + if (res.type === DatasetCollectionTypeEnum.images) { + setCurrentTab(TabEnum.image); + } else { + setCurrentTab(TabEnum.chunk); + } + } } ); + + // Get data const { loading: isFetchingData } = useRequest2( async () => { if (dataId) return getDatasetDataItemById(dataId); @@ -93,8 +103,9 @@ const InputDataModal = ({ onSuccess(res) { if (res) { reset({ - q: res.q, - a: res.a, + q: res.q || '', + a: res.a || '', + imagePreivewUrl: res.imagePreivewUrl, indexes: res.indexes.map((item) => ({ ...item, fold: true @@ -102,54 +113,32 @@ const InputDataModal = ({ }); } else if (defaultValue) { reset({ - q: defaultValue.q, - a: defaultValue.a + q: defaultValue.q || '', + a: defaultValue.a || '', + imagePreivewUrl: defaultValue.imagePreivewUrl }); } - - if (res?.a || defaultValue?.a) { - setCurrentTab(TabEnum.qa); - } }, onError(err) { - toast({ - status: 'error', - title: t(getErrText(err) as any) - }); onClose(); } } ); - const maxToken = useMemo(() => { - const vectorModel = - embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) || - defaultModels.embedding; - - return vectorModel?.maxToken || 3000; - }, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]); - - // import new data + // Import new data const { runAsync: sureImportData, loading: isImporting } = useRequest2( async (e: InputDataType) => { - if (!e.q) { - return Promise.reject(t('common:dataset.data.input is empty')); - } - - const totalLength = e.q.length + (e.a?.length || 0); - if (totalLength >= maxToken * 1.4) { - return Promise.reject(t('common:core.dataset.data.Too Long')); - } - const data = { ...e }; - const dataId = await postInsertData2Dataset({ + const postData: any = { collectionId: collection._id, q: e.q, a: currentTab === TabEnum.qa ? e.a : '', // Contains no default index - indexes: e.indexes?.filter((item) => !!item.text?.trim()) - }); + indexes: e.indexes.filter((item) => !!item.text?.trim()) + }; + + const dataId = await postInsertData2Dataset(postData); return { ...data, @@ -166,23 +155,26 @@ const InputDataModal = ({ a: '', indexes: [] }); + onSuccess(e); }, - errorToast: t('common:error.unKnow') + errorToast: t('dataset:common.error.unKnow') } ); - // update + // Update data const { runAsync: onUpdateData, loading: isUpdating } = useRequest2( async (e: InputDataType) => { if (!dataId) return Promise.reject(t('common:error.unKnow')); - await putDatasetDataById({ + const updateData: any = { dataId, q: e.q, a: currentTab === TabEnum.qa ? e.a : '', indexes: e.indexes.filter((item) => !!item.text?.trim()) - }); + }; + + await putDatasetDataById(updateData); return { dataId, @@ -202,10 +194,18 @@ const InputDataModal = ({ const isLoading = isFetchingData; const icon = useMemo( - () => getSourceNameIcon({ sourceName: collection.sourceName, sourceId: collection.sourceId }), + () => getCollectionIcon({ type: collection.type, name: collection.sourceName }), [collection] ); + const maxToken = useMemo(() => { + const vectorModel = + embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) || + defaultModels.embedding; + + return vectorModel?.maxToken || 2000; + }, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]); + return ( {/* Tab */} - { - setCurrentTab(e); - }} - /> + {(currentTab === TabEnum.chunk || currentTab === TabEnum.qa) && ( + { + setCurrentTab(e); + }} + /> + )} @@ -268,45 +270,64 @@ const InputDataModal = ({ w={['100%', 0]} overflow={['unset', 'auto']} > - - - {currentTab === TabEnum.chunk - ? t('common:dataset_data_input_chunk_content') - : t('common:dataset_data_input_q')} - -