diff --git a/apps/application/template/embed.js b/apps/application/template/embed.js index d9ae3f6cd..eff570e40 100644 --- a/apps/application/template/embed.js +++ b/apps/application/template/embed.js @@ -19,7 +19,7 @@ const guideHtml=` ` const chatButtonHtml= -`
+`
@@ -164,7 +164,7 @@ function initMaxkbStyle(root){ } #maxkb .maxkb-mask .maxkb-content { width: 45px; - height: 50px; + height: 48px; box-shadow: 1px 1px 1px 2000px rgba(0,0,0,.6); border-radius: 50% 0 0 50%; position: absolute; diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py index 4bb70896c..d1f538936 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/doc_split_handle.py @@ -11,14 +11,18 @@ import re from typing import List from docx import Document +from docx.table import Table +from docx.text.paragraph import Paragraph from common.handle.base_split_handle import BaseSplitHandle from common.util.split_model import SplitModel -default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile('(?') for cell in rows[0].cells]) + ' |\n' + md_table += '| ' + ' | '.join(['---' for i in range(len(rows[0].cells))]) + ' |\n' + for row in rows[1:]: + md_table += '| ' + ' | '.join([cell.text.replace("\n", '
') for cell in row.cells]) + ' |\n' + return md_table + def to_md(self, doc): - ps = doc.paragraphs - return "\n".join([self.paragraph_to_md(para) for para in ps]) + elements = [] + for element in doc.element.body: + if element.tag.endswith('tbl'): + # 处理表格 + table = Table(element, doc) + elements.append(table) + elif element.tag.endswith('p'): + # 处理段落 + paragraph = Paragraph(element, doc) + elements.append(paragraph) + + return "\n".join( + [self.paragraph_to_md(element) if isinstance(element, Paragraph) else self.table_to_md(element) for element + in elements]) def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer): try: diff --git a/apps/common/handle/impl/pdf_split_handle.py b/apps/common/handle/impl/pdf_split_handle.py index c839a10aa..b52d341b3 100644 --- a/apps/common/handle/impl/pdf_split_handle.py +++ b/apps/common/handle/impl/pdf_split_handle.py @@ -14,10 +14,13 @@ import fitz from common.handle.base_split_handle import BaseSplitHandle from common.util.split_model import SplitModel -default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile('(? 0: - level_content_list = [*level_content_list, *list( - map(lambda row: to_tree_obj(row, 'block'), - post_handler_paragraph(other_content, with_filter=self.with_filter, limit=self.limit)))] + children = self.parse_to_tree(text=other_content, + index=index) + if len(children) > 0: + level_content_list = [*level_content_list, *children] + else: + if len(other_content.strip()) > 0: + level_content_list = [*level_content_list, *list( + map(lambda row: to_tree_obj(row, 'block'), + post_handler_paragraph(other_content, with_filter=self.with_filter, limit=self.limit)))] else: if len(text.strip()) > 0: level_content_list = [*level_content_list, *list( @@ -330,15 +335,16 @@ class SplitModel: :param text: 文本数据 :return: 解析后数据 {content:段落数据,keywords:[‘段落关键词’],parent_chain:['段落父级链路']} """ - result_tree = self.parse_to_tree(text.replace('\r', '\n'), 0) + text = text.replace('\r', '\n') + result_tree = self.parse_to_tree(text, 0) result = result_tree_to_paragraph(result_tree, [], []) - # 过滤段落内容不为空字符串的数据 - result = [item for item in result if 'content' in item and len(item.get('content').strip()) > 0] - return [self.post_reset_paragraph(item) for item in result] + return [item for item in [self.post_reset_paragraph(row) for row in result] if + 'content' in item and len(item.get('content').strip()) > 0] def post_reset_paragraph(self, paragraph: Dict): result = self.filter_title_special_characters(paragraph) result = self.sub_title(result) + result = self.content_is_null(result) return result @staticmethod @@ -349,6 +355,15 @@ class SplitModel: return {**paragraph, 'title': title[0:255], 'content': title[255:len(title)] + paragraph.get('content')} return paragraph + @staticmethod + def content_is_null(paragraph: Dict): + if 'title' in paragraph: + title = paragraph.get('title') + content = paragraph.get('content') + if (content is None or len(content.strip()) == 0) and (title is not None and len(title) > 0): + return {'title': '', 'content': title} + return paragraph + @staticmethod def filter_title_special_characters(paragraph: Dict): title = paragraph.get('title') if 'title' in paragraph else '' @@ -361,9 +376,12 @@ class SplitModel: title_special_characters_list = ['#', '\n', '\r', '\\s'] default_split_pattern = { - 'md': [re.compile('(?<=^)# .*|(?<=\\n)# .*'), re.compile('(? 0 else True)] @@ -122,7 +123,7 @@ class ProblemSerializers(ApiMixin, serializers.Serializer): self.is_valid(raise_exception=True) problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(dataset_id=self.data.get("dataset_id"), problem_id=self.data.get("problem_id")) - if problem_paragraph_mapping is None or len(problem_paragraph_mapping)==0: + if problem_paragraph_mapping is None or len(problem_paragraph_mapping) == 0: return [] return native_search( QuerySet(Paragraph).filter(id__in=[row.paragraph_id for row in problem_paragraph_mapping]), diff --git a/ui/src/assets/csv-icon.svg b/ui/src/assets/csv-icon.svg new file mode 100644 index 000000000..85147ccb4 --- /dev/null +++ b/ui/src/assets/csv-icon.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/ui/src/assets/doc-icon.svg b/ui/src/assets/doc-icon.svg new file mode 100644 index 000000000..899a00861 --- /dev/null +++ b/ui/src/assets/doc-icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/ui/src/assets/pdf-icon.svg b/ui/src/assets/pdf-icon.svg new file mode 100644 index 000000000..17a4be004 --- /dev/null +++ b/ui/src/assets/pdf-icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/ui/src/assets/unknow-icon.svg b/ui/src/assets/unknow-icon.svg new file mode 100644 index 000000000..20270ac52 --- /dev/null +++ b/ui/src/assets/unknow-icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/ui/src/components/app-table/index.vue b/ui/src/components/app-table/index.vue index 29335ba0a..eac075a76 100644 --- a/ui/src/components/app-table/index.vue +++ b/ui/src/components/app-table/index.vue @@ -9,8 +9,9 @@ :placeholder="`请输入${quickCreateName}`" class="w-500 mr-12" autofocus - :maxlength="quickCreateMaxlength" + :maxlength="quickCreateMaxlength || '-'" :show-word-limit="quickCreateMaxlength ? true : false" + @keydown.enter="submitHandle" /> 创建 @@ -45,6 +46,9 @@ import { ref, nextTick, watch, computed, onMounted } from 'vue' import { MsgError } from '@/utils/message' defineOptions({ name: 'AppTable' }) +import useStore from '@/stores' +const { common } = useStore() + const props = defineProps({ paginationConfig: { type: Object, @@ -65,7 +69,8 @@ const props = defineProps({ quickCreateMaxlength: { type: Number, default: () => 0 - } + }, + storeKey: String }) const emit = defineEmits(['changePage', 'sizeChange', 'creatQuick']) @@ -108,9 +113,15 @@ function quickCreateHandel() { function handleSizeChange() { emit('sizeChange') + if (props.storeKey) { + common.savePage(props.storeKey, props.paginationConfig) + } } function handleCurrentChange() { emit('changePage') + if (props.storeKey) { + common.savePage(props.storeKey, props.paginationConfig) + } } defineExpose({}) diff --git a/ui/src/components/read-write/index.vue b/ui/src/components/read-write/index.vue index 6a04e4633..5cc99a404 100644 --- a/ui/src/components/read-write/index.vue +++ b/ui/src/components/read-write/index.vue @@ -19,7 +19,7 @@ v-model="writeValue" placeholder="请输入" autofocus - :maxlength="maxlength" + :maxlength="maxlength || '-'" :show-word-limit="maxlength ? true : false" >
@@ -75,7 +75,7 @@ function submit() { loading.value = false }, 200) } -function editNameHandle(row: any) { +function editNameHandle() { writeValue.value = props.data isEdit.value = true } diff --git a/ui/src/stores/modules/common.ts b/ui/src/stores/modules/common.ts index 993eba347..606d574f0 100644 --- a/ui/src/stores/modules/common.ts +++ b/ui/src/stores/modules/common.ts @@ -1,13 +1,28 @@ import { defineStore } from 'pinia' +export interface commonTypes { + breadcrumb: any + paginationConfig: any | null + search: any +} + const useCommonStore = defineStore({ id: 'common', - state: () => ({ - breadcrumb: null + state: (): commonTypes => ({ + breadcrumb: null, + // 搜索和分页缓存 + paginationConfig: {}, + search: {} }), actions: { saveBreadcrumb(data: any) { this.breadcrumb = data + }, + savePage(val: string, data: any) { + this.paginationConfig[val] = data + }, + saveCondition(val: string, data: any) { + this.search[val] = data } } }) diff --git a/ui/src/utils/utils.ts b/ui/src/utils/utils.ts index 3a8ed2387..68c01a00b 100644 --- a/ui/src/utils/utils.ts +++ b/ui/src/utils/utils.ts @@ -31,14 +31,16 @@ export const randomId = function () { */ export function fileType(name: string) { const suffix = name.split('.') - return suffix[suffix.length - 1] + + return suffix[suffix.length - 1] === 'docx' ? 'doc' : suffix[suffix.length - 1] } /* 获得文件对应图片 */ export function getImgUrl(name: string) { - const type = fileType(name) || 'txt' + const typeList = ['txt', 'pdf', 'doc', 'csv', 'md'] + const type = typeList.includes(fileType(name)) ? fileType(name) : 'unknow' return new URL(`../assets/${type}-icon.svg`, import.meta.url).href } diff --git a/ui/src/views/dataset/CreateDataset.vue b/ui/src/views/dataset/CreateDataset.vue index beacabfd0..7a55bd963 100644 --- a/ui/src/views/dataset/CreateDataset.vue +++ b/ui/src/views/dataset/CreateDataset.vue @@ -113,18 +113,18 @@ function clearStore() { } function submit() { loading.value = true - const data = [] as any + const documents = [] as any StepSecondRef.value?.paragraphList.map((item: any) => { - data.push({ + documents.push({ name: item.name, paragraphs: item.content }) }) - const obj = { ...baseInfo.value, data } as datasetData + const obj = { ...baseInfo.value, documents } as datasetData if (id) { // 上传文档 document - .asyncPostDocument(id as string, data) + .asyncPostDocument(id as string, documents) .then(() => { MsgSuccess('提交成功') clearStore() diff --git a/ui/src/views/dataset/component/UploadComponent.vue b/ui/src/views/dataset/component/UploadComponent.vue index ff4d95611..d83731cdd 100644 --- a/ui/src/views/dataset/component/UploadComponent.vue +++ b/ui/src/views/dataset/component/UploadComponent.vue @@ -29,7 +29,7 @@

- 支持格式:TXT、Markdown、PDF、DOC、DOCX,每次最多上传50个文件,每个文件不超过 10MB + 支持格式:TXT、Markdown、PDF、DOC、DOCX,每次最多上传50个文件,每个文件不超过 100MB

若使用【高级分段】建议上传前规范文件的分段标识

@@ -43,7 +43,7 @@
- +

{{ item && item?.name }}

{{ filesize(item && item?.size) || '0K' }} diff --git a/ui/src/views/dataset/step/ResultSuccess.vue b/ui/src/views/dataset/step/ResultSuccess.vue index 32b58f554..6d571cab9 100644 --- a/ui/src/views/dataset/step/ResultSuccess.vue +++ b/ui/src/views/dataset/step/ResultSuccess.vue @@ -30,7 +30,7 @@ >
- +

{{ item && item?.name }}

{{ filesize(item && item?.char_length) }} diff --git a/ui/src/views/document/index.vue b/ui/src/views/document/index.vue index d0611a5f5..027cc4875 100644 --- a/ui/src/views/document/index.vue +++ b/ui/src/views/document/index.vue @@ -48,6 +48,7 @@ @selection-change="handleSelectionChange" v-loading="loading" :row-key="(row: any) => row.id" + :storeKey="storeKey" > @@ -156,8 +157,8 @@