diff --git a/packages/global/core/workflow/runtime/type.d.ts b/packages/global/core/workflow/runtime/type.d.ts index 0900795dd..f961f989b 100644 --- a/packages/global/core/workflow/runtime/type.d.ts +++ b/packages/global/core/workflow/runtime/type.d.ts @@ -97,6 +97,7 @@ export type ModuleDispatchProps = ChatDispatchProps & { export type SystemVariablesType = { userId: string; + username: string; appId: string; chatId?: string; responseChatItemId?: string; diff --git a/packages/service/core/dataset/search/controller.ts b/packages/service/core/dataset/search/controller.ts index 71428b4aa..4f1db0795 100644 --- a/packages/service/core/dataset/search/controller.ts +++ b/packages/service/core/dataset/search/controller.ts @@ -302,12 +302,61 @@ export async function searchDatasetData( let tagCollectionIdList: string[] | undefined = undefined; let createTimeCollectionIdList: string[] | undefined = undefined; + let collectionIdList: string[] | undefined = undefined; try { - const jsonMatch = - typeof collectionFilterMatch === 'object' - ? collectionFilterMatch - : json5.parse(collectionFilterMatch); + const jsonMatch = (() => { + if (typeof collectionFilterMatch === 'object') return collectionFilterMatch; + + try { + return json5.parse(collectionFilterMatch); + } catch { + // 针对传入的非标准格式 {"collectionId":[xxx]} 进行处理 + const raw = String(collectionFilterMatch); + + const listMatch = raw.match(/collectionId\s*:\s*\[([^\]]+)\]/i); + if (listMatch) { + const ids = listMatch[1] + .split(',') + .map((id) => id.trim()) + .filter(Boolean) + .map((id) => id.replace(/^['"]|['"]$/g, '')); + + return { collectionId: ids }; + } + + const singleMatch = raw.match(/collectionId\s*:\s*([0-9a-fA-F]{24})/i); + if (singleMatch) { + return { collectionId: [singleMatch[1]] }; + } + + return; + } + })(); + + if (!jsonMatch) return; + + // 获取前端传入的 collectionId,并判断是否有效 + const formatCollectionIds = (() => { + const collectionIdConfig = jsonMatch?.collectionId; + if (collectionIdConfig === undefined || collectionIdConfig === null) return undefined; + if (typeof collectionIdConfig === 'string') { + if (collectionIdConfig.trim() === '') return undefined; + return [collectionIdConfig]; + } + if (Array.isArray(collectionIdConfig)) { + if (collectionIdConfig.length === 0) return undefined; + return collectionIdConfig; + } + if (typeof collectionIdConfig === 'object') { + if ('$in' in collectionIdConfig && Array.isArray(collectionIdConfig.$in)) { + if (collectionIdConfig.$in.length === 0) return undefined; + return collectionIdConfig.$in; + } + return []; + } + return []; + })(); const andTags = jsonMatch?.tags?.$and as (string | null)[] | undefined; const orTags = jsonMatch?.tags?.$or as (string | null)[] | undefined; @@ -408,6 +457,36 @@ export async function searchDatasetData( tagCollectionIdList = collections.map((item) => String(item._id)); } + // collectionId与用户传入的datasetId取交集 + if (formatCollectionIds) { + const isValidObjectId = (value: unknown): value is string => + typeof value === 'string' && Types.ObjectId.isValid(value); + + const validCollectionIds = (formatCollectionIds as unknown[]) + .filter(isValidObjectId) + .map((id) => String(id)); + + if (validCollectionIds.length === 0) return []; + + const collections = await MongoDatasetCollection.find( + { + teamId, + datasetId: { $in: datasetIds }, + _id: { $in: validCollectionIds } + }, + '_id type', + { + ...readFromSecondary + } + ).lean(); + + if (collections.length === 0) return []; + + collectionIdList = await getAllCollectionIds({ + parentCollectionIds: collections.map((item) => String(item._id)) + }); + } + // time const getCreateTime = jsonMatch?.createTime?.$gte as string | undefined; const lteCreateTime = jsonMatch?.createTime?.$lte as string | undefined; @@ -429,7 +508,7 @@ export async function searchDatasetData( } // Concat tag and time - const collectionIds = (() => { + const tagOrTimeCollectionIds = (() => { if (tagCollectionIdList && createTimeCollectionIdList) { return tagCollectionIdList.filter((id) => (createTimeCollectionIdList as string[]).includes(id) @@ -439,9 +518,22 @@ export async function searchDatasetData( return tagCollectionIdList || createTimeCollectionIdList; })(); - return await getAllCollectionIds({ - parentCollectionIds: collectionIds + const tagTimeResult = await getAllCollectionIds({ + parentCollectionIds: tagOrTimeCollectionIds }); + + if (!tagOrTimeCollectionIds && collectionIdList === undefined) return; + + if (tagOrTimeCollectionIds && (tagTimeResult?.length || 0) === 0) return []; + + if (collectionIdList === undefined) return tagTimeResult; + + if (collectionIdList && collectionIdList.length === 0) return []; + + if (!tagTimeResult) return collectionIdList; + + const collectionIdSet = new Set(collectionIdList); + return tagTimeResult.filter((id) => collectionIdSet.has(id)); } catch (error) {} }; const embeddingRecall = async ({ @@ -770,6 +862,15 @@ export async function searchDatasetData( filterCollectionByMetadata() ]); + //元数据过滤为空结果时,直接跳过知识库检索 + if (filterCollectionIdList && filterCollectionIdList.length === 0) { + return { + tokens: 0, + embeddingRecallResults: [], + fullTextRecallResults: [] + }; + } + const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([ embeddingRecall({ queries, diff --git a/packages/service/core/workflow/dispatch/index.ts b/packages/service/core/workflow/dispatch/index.ts index b6be94f1c..5bf38a017 100644 --- a/packages/service/core/workflow/dispatch/index.ts +++ b/packages/service/core/workflow/dispatch/index.ts @@ -1140,6 +1140,7 @@ export const runWorkflow = async (data: RunWorkflowProps): Promise): SystemVariablesType => { return { userId: variables.userId, + username: variables.username, appId: variables.appId, chatId: variables.chatId, responseChatItemId: variables.responseChatItemId, diff --git a/packages/web/i18n/en/workflow.json b/packages/web/i18n/en/workflow.json index 57c9fe33a..26f78e199 100644 --- a/packages/web/i18n/en/workflow.json +++ b/packages/web/i18n/en/workflow.json @@ -68,7 +68,7 @@ "field_name_already_exists": "Field name already exists", "field_required": "Required", "field_used_as_tool_input": "Used as Tool Call Parameter", - "filter_description": "Currently supports filtering by tags and creation time. Fill in the format as follows:\n{\n \"tags\": {\n \"$and\": [\"Tag 1\",\"Tag 2\"],\n \"$or\": [\"When there are $and tags, and is effective, or is not effective\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm format, collection creation time greater than this time\",\n \"$lte\": \"YYYY-MM-DD HH:mm format, collection creation time less than this time, can be used with $gte\"\n }\n}", + "filter_description": "Supports filtering by tags, createTime and collectionId. Use the format below:\n{\n \"tags\": {\n \"$and\": [\"Tag 1\",\"Tag 2\"],\n \"$or\": [\"When $and exists, $and is effective and $or is ignored\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm, collection created after this time\",\n \"$lte\": \"YYYY-MM-DD HH:mm, collection created before this time; can be used with $gte\"\n },\n \"collectionId\": [\"64d1...abc\", \"64d1...def\"]\n // You may also pass a single string, or { \"$in\": [\"id1\",\"id2\"] }. Omit/empty array means no collection filter.\n}", "find_tip": "Find node ctrl f", "find_tip_mac": "Find node ⌘ f", "foldAll": "Collapse all", @@ -213,6 +213,7 @@ "update_link_error": "Error updating link", "update_specified_node_output_or_global_variable": "Can update the output value of a specified node or update global variables", "use_user_id": "User ID", + "user_info": "User info", "user_form_input_config": "Form configuration", "user_form_input_description": "describe", "user_form_input_name": "Name", diff --git a/packages/web/i18n/zh-CN/workflow.json b/packages/web/i18n/zh-CN/workflow.json index 972239c84..c19e6eb65 100644 --- a/packages/web/i18n/zh-CN/workflow.json +++ b/packages/web/i18n/zh-CN/workflow.json @@ -68,7 +68,7 @@ "field_name_already_exists": "字段名已经存在", "field_required": "必填", "field_used_as_tool_input": "作为工具调用参数", - "filter_description": "目前支持标签和创建时间过滤,需按照以下格式填写:\n{\n \"tags\": {\n \"$and\": [\"标签 1\",\"标签 2\"],\n \"$or\": [\"有 $and 标签时,and 生效,or 不生效\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm 格式即可,集合的创建时间大于该时间\",\n \"$lte\": \"YYYY-MM-DD HH:mm 格式即可,集合的创建时间小于该时间,可和 $gte 共同使用\"\n }\n}", + "filter_description": "目前支持标签、创建时间和文件ID过滤,按如下格式填写:\n{\n \"tags\": {\n \"$and\": [\"标签 1\",\"标签 2\"],\n \"$or\": [\"有 $and 标签时,and 生效,or 不生效\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm,集合创建时间大于该时间\",\n \"$lte\": \"YYYY-MM-DD HH:mm,集合创建时间小于该时间,可与 $gte 同用\"\n },\n \"collectionId\": [\"64d1...abc\", \"64d1...def\"]\n // 也可传单个字符串,或 { \"$in\": [\"id1\",\"id2\"] },不传/空数组表示不过滤\n}", "find_tip": "查找节点 ctrl f", "find_tip_mac": "查找节点 ⌘ f", "foldAll": "全部折叠", @@ -213,6 +213,7 @@ "update_link_error": "更新链接异常", "update_specified_node_output_or_global_variable": "可以更新指定节点的输出值或更新全局变量", "use_user_id": "使用者 ID", + "user_info": "用户信息", "user_form_input_config": "表单配置", "user_form_input_description": "描述", "user_form_input_name": "标题", diff --git a/packages/web/i18n/zh-Hant/workflow.json b/packages/web/i18n/zh-Hant/workflow.json index ba60fee3a..2fb7e3b61 100644 --- a/packages/web/i18n/zh-Hant/workflow.json +++ b/packages/web/i18n/zh-Hant/workflow.json @@ -68,7 +68,7 @@ "field_name_already_exists": "欄位名稱已存在", "field_required": "必填", "field_used_as_tool_input": "作為工具呼叫參數", - "filter_description": "目前支援標籤和建立時間篩選,需按照以下格式填寫:\n{\n \"tags\": {\n \"$and\": [\"標籤 1\",\"標籤 2\"],\n \"$or\": [\"當有 $and 標籤時,$and 才會生效,$or 不會生效\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm 格式,資料集的建立時間大於這個時間\",\n \"$lte\": \"YYYY-MM-DD HH:mm 格式,資料集的建立時間小於這個時間,可以和 $gte 一起使用\"\n }\n}", + "filter_description": "支援標籤、建立時間與文件 ID 篩選,按以下格式填寫:\n{\n \"tags\": {\n \"$and\": [\"標籤 1\",\"標籤 2\"],\n \"$or\": [\"有 $and 標籤時,and 生效,or 不生效\"]\n },\n \"createTime\": {\n \"$gte\": \"YYYY-MM-DD HH:mm,集合建立時間大於此時間\",\n \"$lte\": \"YYYY-MM-DD HH:mm,集合建立時間小於此時間,可與 $gte 同用\"\n },\n \"collectionId\": [\"64d1...abc\", \"64d1...def\"]\n // 也可傳單個字串,或 { \"$in\": [\"id1\",\"id2\"] },不傳/空陣列表示不過濾\n}", "find_tip": "查找節點 ctrl f", "find_tip_mac": "查找節點 ⌘ f", "foldAll": "全部折疊", @@ -213,6 +213,7 @@ "update_link_error": "更新連結發生錯誤", "update_specified_node_output_or_global_variable": "可以更新指定節點的輸出值或更新全域變數", "use_user_id": "使用者 ID", + "user_info": "使用者資訊", "user_form_input_config": "表單設定", "user_form_input_description": "描述", "user_form_input_name": "標題", diff --git a/projects/app/src/web/core/app/utils.ts b/projects/app/src/web/core/app/utils.ts index 1785e1379..295ddf9cd 100644 --- a/projects/app/src/web/core/app/utils.ts +++ b/projects/app/src/web/core/app/utils.ts @@ -588,6 +588,12 @@ export const workflowSystemVariables: EditorVariablePickerType[] = [ required: true, valueType: WorkflowIOValueTypeEnum.string }, + { + key: 'username', + label: i18nT('workflow:user_info'), + required: true, + valueType: WorkflowIOValueTypeEnum.string + }, { key: 'appId', label: i18nT('common:core.module.http.AppId'),