feat: 知识库增加重新向量化功能

This commit is contained in:
shaohuzhang1 2024-05-24 11:27:59 +08:00 committed by GitHub
parent 9ac9c9b64a
commit a3af104ef0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 169 additions and 53 deletions

View File

@ -123,6 +123,8 @@ class ListenerManagement:
:return: None
"""
max_kb.info(f"开始--->向量化文档:{document_id}")
QuerySet(Document).filter(id=document_id).update(**{'status': Status.embedding})
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': Status.embedding})
status = Status.success
try:
data_list = native_search(

View File

@ -680,6 +680,11 @@ class DataSetSerializers(serializers.ModelSerializer):
ListenerManagement.delete_embedding_by_dataset_signal.send(self.data.get('id'))
return True
def re_embedding(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id'))
def list_application(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)

View File

@ -448,18 +448,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
if with_valid:
self.is_valid(raise_exception=True)
document_id = self.data.get("document_id")
document = QuerySet(Document).filter(id=document_id).first()
if document.type == Type.web:
# 异步同步
work_thread_pool.submit(lambda x: DocumentSerializers.Sync(data={'document_id': document_id}).sync(),
{})
else:
if document.status != Status.embedding.value:
document.status = Status.embedding
document.save()
ListenerManagement.embedding_by_document_signal.send(document_id)
return True
ListenerManagement.embedding_by_document_signal.send(document_id)
@transaction.atomic
def delete(self):

View File

@ -8,6 +8,7 @@ urlpatterns = [
path('dataset/web', views.Dataset.CreateWebDataset.as_view()),
path('dataset/qa', views.Dataset.CreateQADataset.as_view()),
path('dataset/<str:dataset_id>', views.Dataset.Operate.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),
@ -26,6 +27,7 @@ urlpatterns = [
path('dataset/document/split_pattern', views.Document.SplitPattern.as_view(),
name="document_operate"),
path('dataset/<str:dataset_id>/document/migrate/<str:target_dataset_id>', views.Document.Migrate.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/sync', views.Document.SyncWeb.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/refresh', views.Document.Refresh.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/paragraph', views.Paragraph.as_view()),
path(

View File

@ -137,6 +137,21 @@ class Dataset(APIView):
'search_mode': request.query_params.get('search_mode')}).hit_test(
))
class Embedding(APIView):
authentication_classes = [TokenAuth]
@action(methods="PUT", detail=False)
@swagger_auto_schema(operation_summary="重新向量化", operation_id="重新向量化",
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=["知识库"]
)
@has_permissions(lambda r, keywords: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=keywords.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
class Operate(APIView):
authentication_classes = [TokenAuth]

View File

@ -168,6 +168,24 @@ class Document(APIView):
def delete(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data))
class SyncWeb(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary="同步web站点类型",
operation_id="同步web站点类型",
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=["知识库/文档"]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Sync(data={'document_id': document_id, 'dataset_id': dataset_id}).sync(
))
class Refresh(APIView):
authentication_classes = [TokenAuth]

View File

@ -176,6 +176,17 @@ const putSyncWebDataset: (
return put(`${prefix}/${dataset_id}/sync_web`, undefined, { sync_type }, loading)
}
/**
*
* @param dataset_id
*/
const putReEmbeddingDataset: (
dataset_id: string,
loading?: Ref<boolean>
) => Promise<Result<any>> = (dataset_id, loading) => {
return put(`${prefix}/${dataset_id}/re_embedding`, undefined, undefined, loading)
}
export default {
getDataset,
getAllDataset,
@ -186,6 +197,9 @@ export default {
listUsableApplication,
getDatasetHitTest,
postWebDataset,
putSyncWebDataset,
putReEmbeddingDataset,
postQADataset,
putSyncWebDataset
}

View File

@ -137,12 +137,8 @@ const getDocumentDetail: (dataset_id: string, document_id: string) => Promise<Re
/**
*
* @param
* dataset_id, document_id,
* {
"name": "string",
"is_active": true
}
* @param
* dataset_id, document_id,
*/
const putDocumentRefresh: (
dataset_id: string,
@ -157,6 +153,19 @@ const putDocumentRefresh: (
)
}
/**
* web站点类型
* @param
* dataset_id, document_id,
*/
const putDocumentSync: (
dataset_id: string,
document_id: string,
loading?: Ref<boolean>
) => Promise<Result<any>> = (dataset_id, document_id, loading) => {
return put(`${prefix}/${dataset_id}/document/${document_id}/sync`, undefined, undefined, loading)
}
/**
*
* @param dataset_id,
@ -258,6 +267,7 @@ export default {
getDocumentDetail,
listSplitPattern,
putDocumentRefresh,
putDocumentSync,
delMulSyncDocument,
postWebDocument,
putMigrateMulDocument,

View File

@ -873,5 +873,42 @@ export const iconMap: any = {
)
])
}
},
'app-document-refresh': {
iconReader: () => {
return h('i', [
h(
'svg',
{
style: { height: '100%', width: '100%' },
viewBox: '0 0 1024 1024',
version: '1.1',
xmlns: 'http://www.w3.org/2000/svg'
},
[
h('path', {
d: 'M494.592 165.12l-320 208a32 32 0 0 0-14.592 26.88v224a32 32 0 0 0 14.592 26.88l320 208a32 32 0 0 0 34.88 0l320-208a32 32 0 0 0 14.528-26.88v-224a32 32 0 0 0-14.528-26.88l-320-208a32 32 0 0 0-34.88 0zM224 417.408L512 230.144l288 187.2V606.72L512 793.856 224 606.656V417.28z',
fill: 'currentColor'
}),
h('path', {
d: 'M512 592a32 32 0 0 0-32 32V832a32 32 0 0 0 64 0V624a32 32 0 0 0-32-32z',
fill: 'currentColor'
}),
h('path', {
d: 'M165.76 381.632a32 32 0 0 0 7.872 44.608l320 224a32 32 0 0 0 36.736 0l320-224a32 32 0 0 0-36.736-52.48L512 584.96l-301.632-211.2a32 32 0 0 0-44.608 7.872z',
fill: 'currentColor'
}),
h('path', {
d: 'M493.632 373.76a32 32 0 0 1 36.736 0l320 224a32 32 0 0 1-36.736 52.48L512 439.04l-301.632 211.2a32 32 0 1 1-36.736-52.48l320-224z',
fill: 'currentColor'
}),
h('path', {
d: 'M512 160a32 32 0 0 0-32 32v208a32 32 0 0 0 64 0V192a32 32 0 0 0-32-32z',
fill: 'currentColor'
})
]
)
])
}
}
}

View File

@ -1,5 +1,9 @@
<template>
<MdEditor noIconfont v-bind="$attrs" />
<MdEditor noIconfont v-bind="$attrs">
<template #defFooters>
<slot name="defFooters"> </slot>
</template>
</MdEditor>
</template>
<script setup lang="ts">

View File

@ -75,6 +75,13 @@
v-if="item.type === '1'"
>同步</el-dropdown-item
>
<el-dropdown-item @click="reEmbeddingDataset(item)">
<AppIcon
iconName="app-document-refresh"
style="font-size: 16px"
></AppIcon>
重新向量化</el-dropdown-item
>
<el-dropdown-item
icon="Setting"
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
@ -118,10 +125,14 @@ const paginationConfig = reactive({
const searchValue = ref('')
function refresh(row: any) {
function refresh() {
MsgSuccess('同步任务发送成功')
}
function reEmbeddingDataset(row: any) {
datasetApi.putReEmbeddingDataset(row.id).then(() => {})
}
function syncDataset(row: any) {
SyncWebDialogRef.value.open(row.id)
}

View File

@ -146,7 +146,7 @@
<span class="mr-4">
<el-tooltip effect="dark" content="重新向量化" placement="top">
<el-button type="primary" text @click.stop="refreshDocument(row)">
<el-icon><RefreshRight /></el-icon>
<AppIcon iconName="app-document-refresh" style="font-size: 16px"></AppIcon>
</el-button>
</el-tooltip>
</span>
@ -174,13 +174,8 @@
</span>
</div>
<div v-if="datasetDetail.type === '1'">
<el-tooltip
effect="dark"
content="同步"
placement="top"
v-if="datasetDetail.type === '1'"
>
<el-button type="primary" text @click.stop="refreshDocument(row)">
<el-tooltip effect="dark" content="同步" placement="top">
<el-button type="primary" text @click.stop="syncDocument(row)">
<el-icon><Refresh /></el-icon>
</el-button>
</el-tooltip>
@ -191,6 +186,13 @@
</el-button>
<template #dropdown>
<el-dropdown-menu>
<el-dropdown-item @click="refreshDocument(row)">
<AppIcon
iconName="app-document-refresh"
style="font-size: 16px"
></AppIcon>
重新向量化</el-dropdown-item
>
<el-dropdown-item icon="Setting" @click="settingDoc(row)"
>设置</el-dropdown-item
>
@ -340,33 +342,33 @@ const closeInterval = () => {
clearInterval(interval)
}
}
function refreshDocument(row: any) {
if (row.type === '1') {
if (row.meta?.source_url) {
MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, {
confirmButtonText: '同步',
confirmButtonClass: 'danger'
})
.then(() => {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
})
})
.catch(() => {})
} else {
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
confirmButtonText: '确认',
type: 'warning'
})
.then(() => {})
.catch(() => {})
}
} else {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
function syncDocument(row: any) {
if (row.meta?.source_url) {
MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, {
confirmButtonText: '同步',
confirmButtonClass: 'danger'
})
.then(() => {
documentApi.putDocumentSync(row.dataset_id, row.id).then(() => {
getList()
})
})
.catch(() => {})
} else {
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
confirmButtonText: '确认',
type: 'warning'
})
.then(() => {})
.catch(() => {})
}
}
function refreshDocument(row: any) {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
})
}
function rowClickHandle(row: any, column: any) {
if (column && column.type === 'selection') {

View File

@ -21,7 +21,12 @@
:toolbars="toolbars"
style="height: 300px"
@onUploadImg="onUploadImg"
/>
:footers="footers"
>
<template #defFooters>
<span style="margin-left: -6px;">/ 4096</span>
</template>
</MarkdownEditor>
<MdPreview
v-else
ref="editorRef"
@ -76,6 +81,8 @@ const toolbars = [
'htmlPreview'
] as any[]
const footers = ['markdownTotal', 0, '=', 1, 'scrollSwitch']
const editorRef = ref()
const form = ref<any>({