diff --git a/apps/common/handle/impl/text_split_handle.py b/apps/common/handle/impl/text_split_handle.py index d40220638..56e36f93b 100644 --- a/apps/common/handle/impl/text_split_handle.py +++ b/apps/common/handle/impl/text_split_handle.py @@ -29,7 +29,8 @@ class TextSplitHandle(BaseSplitHandle): if file_name.endswith(".md") or file_name.endswith('.txt'): return True result = detect(buffer) - if result['encoding'] != 'ascii' and result['confidence'] > 0.5: + if result['encoding'] is not None and result['confidence'] is not None and result['encoding'] != 'ascii' and \ + result['confidence'] > 0.5: return True return False diff --git a/ui/src/utils/utils.ts b/ui/src/utils/utils.ts index 68c01a00b..370b7efbc 100644 --- a/ui/src/utils/utils.ts +++ b/ui/src/utils/utils.ts @@ -31,18 +31,21 @@ export const randomId = function () { */ export function fileType(name: string) { const suffix = name.split('.') - - return suffix[suffix.length - 1] === 'docx' ? 'doc' : suffix[suffix.length - 1] + return suffix[suffix.length - 1] } /* 获得文件对应图片 */ export function getImgUrl(name: string) { - const typeList = ['txt', 'pdf', 'doc', 'csv', 'md'] - const type = typeList.includes(fileType(name)) ? fileType(name) : 'unknow' + const type = isRightType(name) ? fileType(name) : 'unknow' return new URL(`../assets/${type}-icon.svg`, import.meta.url).href } +// 是否是白名单后缀 +export function isRightType(name: string) { + const typeList = ['txt', 'pdf', 'docx', 'csv', 'md'] + return typeList.includes(fileType(name)) +} /* 从指定数组中过滤出对应的对象 diff --git a/ui/src/views/dataset/component/UploadComponent.vue b/ui/src/views/dataset/component/UploadComponent.vue index d83731cdd..488c3c97d 100644 --- a/ui/src/views/dataset/component/UploadComponent.vue +++ b/ui/src/views/dataset/component/UploadComponent.vue @@ -16,7 +16,7 @@ action="#" :auto-upload="false" :show-file-list="false" - accept=".txt, .md, .csv, .log, .doc, .docx, .pdf" + accept=".txt, .md, .csv, .log, .docx, .pdf" :limit="50" :on-exceed="onExceed" :on-change="filehandleChange" @@ -29,7 +29,7 @@
- 支持格式:TXT、Markdown、PDF、DOC、DOCX,每次最多上传50个文件,每个文件不超过 100MB + 支持格式:TXT、Markdown、PDF、DOCX,每次最多上传50个文件,每个文件不超过 100MB
若使用【高级分段】建议上传前规范文件的分段标识