mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
feat: 知识库增加重新向量化功能
This commit is contained in:
parent
9ac9c9b64a
commit
a3af104ef0
|
|
@ -123,6 +123,8 @@ class ListenerManagement:
|
|||
:return: None
|
||||
"""
|
||||
max_kb.info(f"开始--->向量化文档:{document_id}")
|
||||
QuerySet(Document).filter(id=document_id).update(**{'status': Status.embedding})
|
||||
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': Status.embedding})
|
||||
status = Status.success
|
||||
try:
|
||||
data_list = native_search(
|
||||
|
|
|
|||
|
|
@ -680,6 +680,11 @@ class DataSetSerializers(serializers.ModelSerializer):
|
|||
ListenerManagement.delete_embedding_by_dataset_signal.send(self.data.get('id'))
|
||||
return True
|
||||
|
||||
def re_embedding(self, with_valid=True):
|
||||
if with_valid:
|
||||
self.is_valid(raise_exception=True)
|
||||
ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id'))
|
||||
|
||||
def list_application(self, with_valid=True):
|
||||
if with_valid:
|
||||
self.is_valid(raise_exception=True)
|
||||
|
|
|
|||
|
|
@ -448,18 +448,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
|
|||
if with_valid:
|
||||
self.is_valid(raise_exception=True)
|
||||
document_id = self.data.get("document_id")
|
||||
document = QuerySet(Document).filter(id=document_id).first()
|
||||
if document.type == Type.web:
|
||||
# 异步同步
|
||||
work_thread_pool.submit(lambda x: DocumentSerializers.Sync(data={'document_id': document_id}).sync(),
|
||||
{})
|
||||
|
||||
else:
|
||||
if document.status != Status.embedding.value:
|
||||
document.status = Status.embedding
|
||||
document.save()
|
||||
ListenerManagement.embedding_by_document_signal.send(document_id)
|
||||
return True
|
||||
ListenerManagement.embedding_by_document_signal.send(document_id)
|
||||
|
||||
@transaction.atomic
|
||||
def delete(self):
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ urlpatterns = [
|
|||
path('dataset/web', views.Dataset.CreateWebDataset.as_view()),
|
||||
path('dataset/qa', views.Dataset.CreateQADataset.as_view()),
|
||||
path('dataset/<str:dataset_id>', views.Dataset.Operate.as_view(), name="dataset_key"),
|
||||
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
|
||||
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
|
||||
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
|
||||
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),
|
||||
|
|
@ -26,6 +27,7 @@ urlpatterns = [
|
|||
path('dataset/document/split_pattern', views.Document.SplitPattern.as_view(),
|
||||
name="document_operate"),
|
||||
path('dataset/<str:dataset_id>/document/migrate/<str:target_dataset_id>', views.Document.Migrate.as_view()),
|
||||
path('dataset/<str:dataset_id>/document/<str:document_id>/sync', views.Document.SyncWeb.as_view()),
|
||||
path('dataset/<str:dataset_id>/document/<str:document_id>/refresh', views.Document.Refresh.as_view()),
|
||||
path('dataset/<str:dataset_id>/document/<str:document_id>/paragraph', views.Paragraph.as_view()),
|
||||
path(
|
||||
|
|
|
|||
|
|
@ -137,6 +137,21 @@ class Dataset(APIView):
|
|||
'search_mode': request.query_params.get('search_mode')}).hit_test(
|
||||
))
|
||||
|
||||
class Embedding(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
@action(methods="PUT", detail=False)
|
||||
@swagger_auto_schema(operation_summary="重新向量化", operation_id="重新向量化",
|
||||
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
|
||||
responses=result.get_default_response(),
|
||||
tags=["知识库"]
|
||||
)
|
||||
@has_permissions(lambda r, keywords: Permission(group=Group.DATASET, operate=Operate.MANAGE,
|
||||
dynamic_tag=keywords.get('dataset_id')))
|
||||
def put(self, request: Request, dataset_id: str):
|
||||
return result.success(
|
||||
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
|
||||
|
||||
class Operate(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
|
|
|
|||
|
|
@ -168,6 +168,24 @@ class Document(APIView):
|
|||
def delete(self, request: Request, dataset_id: str):
|
||||
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data))
|
||||
|
||||
class SyncWeb(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
@action(methods=['PUT'], detail=False)
|
||||
@swagger_auto_schema(operation_summary="同步web站点类型",
|
||||
operation_id="同步web站点类型",
|
||||
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
|
||||
responses=result.get_default_response(),
|
||||
tags=["知识库/文档"]
|
||||
)
|
||||
@has_permissions(
|
||||
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
|
||||
dynamic_tag=k.get('dataset_id')))
|
||||
def put(self, request: Request, dataset_id: str, document_id: str):
|
||||
return result.success(
|
||||
DocumentSerializers.Sync(data={'document_id': document_id, 'dataset_id': dataset_id}).sync(
|
||||
))
|
||||
|
||||
class Refresh(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
|
|
|
|||
|
|
@ -176,6 +176,17 @@ const putSyncWebDataset: (
|
|||
return put(`${prefix}/${dataset_id}/sync_web`, undefined, { sync_type }, loading)
|
||||
}
|
||||
|
||||
/**
|
||||
* 重新向量化知识库
|
||||
* @param 参数 dataset_id
|
||||
*/
|
||||
const putReEmbeddingDataset: (
|
||||
dataset_id: string,
|
||||
loading?: Ref<boolean>
|
||||
) => Promise<Result<any>> = (dataset_id, loading) => {
|
||||
return put(`${prefix}/${dataset_id}/re_embedding`, undefined, undefined, loading)
|
||||
}
|
||||
|
||||
export default {
|
||||
getDataset,
|
||||
getAllDataset,
|
||||
|
|
@ -186,6 +197,9 @@ export default {
|
|||
listUsableApplication,
|
||||
getDatasetHitTest,
|
||||
postWebDataset,
|
||||
putSyncWebDataset,
|
||||
putReEmbeddingDataset,
|
||||
postQADataset,
|
||||
putSyncWebDataset
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,12 +137,8 @@ const getDocumentDetail: (dataset_id: string, document_id: string) => Promise<Re
|
|||
|
||||
/**
|
||||
* 刷新文档向量库
|
||||
* @param 参数
|
||||
* dataset_id, document_id,
|
||||
* {
|
||||
"name": "string",
|
||||
"is_active": true
|
||||
}
|
||||
* @param 参数
|
||||
* dataset_id, document_id,
|
||||
*/
|
||||
const putDocumentRefresh: (
|
||||
dataset_id: string,
|
||||
|
|
@ -157,6 +153,19 @@ const putDocumentRefresh: (
|
|||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步web站点类型
|
||||
* @param 参数
|
||||
* dataset_id, document_id,
|
||||
*/
|
||||
const putDocumentSync: (
|
||||
dataset_id: string,
|
||||
document_id: string,
|
||||
loading?: Ref<boolean>
|
||||
) => Promise<Result<any>> = (dataset_id, document_id, loading) => {
|
||||
return put(`${prefix}/${dataset_id}/document/${document_id}/sync`, undefined, undefined, loading)
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量同步文档
|
||||
* @param 参数 dataset_id,
|
||||
|
|
@ -258,6 +267,7 @@ export default {
|
|||
getDocumentDetail,
|
||||
listSplitPattern,
|
||||
putDocumentRefresh,
|
||||
putDocumentSync,
|
||||
delMulSyncDocument,
|
||||
postWebDocument,
|
||||
putMigrateMulDocument,
|
||||
|
|
|
|||
|
|
@ -873,5 +873,42 @@ export const iconMap: any = {
|
|||
)
|
||||
])
|
||||
}
|
||||
},
|
||||
'app-document-refresh': {
|
||||
iconReader: () => {
|
||||
return h('i', [
|
||||
h(
|
||||
'svg',
|
||||
{
|
||||
style: { height: '100%', width: '100%' },
|
||||
viewBox: '0 0 1024 1024',
|
||||
version: '1.1',
|
||||
xmlns: 'http://www.w3.org/2000/svg'
|
||||
},
|
||||
[
|
||||
h('path', {
|
||||
d: 'M494.592 165.12l-320 208a32 32 0 0 0-14.592 26.88v224a32 32 0 0 0 14.592 26.88l320 208a32 32 0 0 0 34.88 0l320-208a32 32 0 0 0 14.528-26.88v-224a32 32 0 0 0-14.528-26.88l-320-208a32 32 0 0 0-34.88 0zM224 417.408L512 230.144l288 187.2V606.72L512 793.856 224 606.656V417.28z',
|
||||
fill: 'currentColor'
|
||||
}),
|
||||
h('path', {
|
||||
d: 'M512 592a32 32 0 0 0-32 32V832a32 32 0 0 0 64 0V624a32 32 0 0 0-32-32z',
|
||||
fill: 'currentColor'
|
||||
}),
|
||||
h('path', {
|
||||
d: 'M165.76 381.632a32 32 0 0 0 7.872 44.608l320 224a32 32 0 0 0 36.736 0l320-224a32 32 0 0 0-36.736-52.48L512 584.96l-301.632-211.2a32 32 0 0 0-44.608 7.872z',
|
||||
fill: 'currentColor'
|
||||
}),
|
||||
h('path', {
|
||||
d: 'M493.632 373.76a32 32 0 0 1 36.736 0l320 224a32 32 0 0 1-36.736 52.48L512 439.04l-301.632 211.2a32 32 0 1 1-36.736-52.48l320-224z',
|
||||
fill: 'currentColor'
|
||||
}),
|
||||
h('path', {
|
||||
d: 'M512 160a32 32 0 0 0-32 32v208a32 32 0 0 0 64 0V192a32 32 0 0 0-32-32z',
|
||||
fill: 'currentColor'
|
||||
})
|
||||
]
|
||||
)
|
||||
])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
<template>
|
||||
<MdEditor noIconfont v-bind="$attrs" />
|
||||
<MdEditor noIconfont v-bind="$attrs">
|
||||
<template #defFooters>
|
||||
<slot name="defFooters"> </slot>
|
||||
</template>
|
||||
</MdEditor>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
|
|
|
|||
|
|
@ -75,6 +75,13 @@
|
|||
v-if="item.type === '1'"
|
||||
>同步</el-dropdown-item
|
||||
>
|
||||
<el-dropdown-item @click="reEmbeddingDataset(item)">
|
||||
<AppIcon
|
||||
iconName="app-document-refresh"
|
||||
style="font-size: 16px"
|
||||
></AppIcon>
|
||||
重新向量化</el-dropdown-item
|
||||
>
|
||||
<el-dropdown-item
|
||||
icon="Setting"
|
||||
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
|
||||
|
|
@ -118,10 +125,14 @@ const paginationConfig = reactive({
|
|||
|
||||
const searchValue = ref('')
|
||||
|
||||
function refresh(row: any) {
|
||||
function refresh() {
|
||||
MsgSuccess('同步任务发送成功')
|
||||
}
|
||||
|
||||
function reEmbeddingDataset(row: any) {
|
||||
datasetApi.putReEmbeddingDataset(row.id).then(() => {})
|
||||
}
|
||||
|
||||
function syncDataset(row: any) {
|
||||
SyncWebDialogRef.value.open(row.id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@
|
|||
<span class="mr-4">
|
||||
<el-tooltip effect="dark" content="重新向量化" placement="top">
|
||||
<el-button type="primary" text @click.stop="refreshDocument(row)">
|
||||
<el-icon><RefreshRight /></el-icon>
|
||||
<AppIcon iconName="app-document-refresh" style="font-size: 16px"></AppIcon>
|
||||
</el-button>
|
||||
</el-tooltip>
|
||||
</span>
|
||||
|
|
@ -174,13 +174,8 @@
|
|||
</span>
|
||||
</div>
|
||||
<div v-if="datasetDetail.type === '1'">
|
||||
<el-tooltip
|
||||
effect="dark"
|
||||
content="同步"
|
||||
placement="top"
|
||||
v-if="datasetDetail.type === '1'"
|
||||
>
|
||||
<el-button type="primary" text @click.stop="refreshDocument(row)">
|
||||
<el-tooltip effect="dark" content="同步" placement="top">
|
||||
<el-button type="primary" text @click.stop="syncDocument(row)">
|
||||
<el-icon><Refresh /></el-icon>
|
||||
</el-button>
|
||||
</el-tooltip>
|
||||
|
|
@ -191,6 +186,13 @@
|
|||
</el-button>
|
||||
<template #dropdown>
|
||||
<el-dropdown-menu>
|
||||
<el-dropdown-item @click="refreshDocument(row)">
|
||||
<AppIcon
|
||||
iconName="app-document-refresh"
|
||||
style="font-size: 16px"
|
||||
></AppIcon>
|
||||
重新向量化</el-dropdown-item
|
||||
>
|
||||
<el-dropdown-item icon="Setting" @click="settingDoc(row)"
|
||||
>设置</el-dropdown-item
|
||||
>
|
||||
|
|
@ -340,33 +342,33 @@ const closeInterval = () => {
|
|||
clearInterval(interval)
|
||||
}
|
||||
}
|
||||
function refreshDocument(row: any) {
|
||||
if (row.type === '1') {
|
||||
if (row.meta?.source_url) {
|
||||
MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, {
|
||||
confirmButtonText: '同步',
|
||||
confirmButtonClass: 'danger'
|
||||
})
|
||||
.then(() => {
|
||||
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
|
||||
getList()
|
||||
})
|
||||
})
|
||||
.catch(() => {})
|
||||
} else {
|
||||
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
|
||||
confirmButtonText: '确认',
|
||||
type: 'warning'
|
||||
})
|
||||
.then(() => {})
|
||||
.catch(() => {})
|
||||
}
|
||||
} else {
|
||||
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
|
||||
getList()
|
||||
|
||||
function syncDocument(row: any) {
|
||||
if (row.meta?.source_url) {
|
||||
MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, {
|
||||
confirmButtonText: '同步',
|
||||
confirmButtonClass: 'danger'
|
||||
})
|
||||
.then(() => {
|
||||
documentApi.putDocumentSync(row.dataset_id, row.id).then(() => {
|
||||
getList()
|
||||
})
|
||||
})
|
||||
.catch(() => {})
|
||||
} else {
|
||||
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
|
||||
confirmButtonText: '确认',
|
||||
type: 'warning'
|
||||
})
|
||||
.then(() => {})
|
||||
.catch(() => {})
|
||||
}
|
||||
}
|
||||
function refreshDocument(row: any) {
|
||||
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
|
||||
getList()
|
||||
})
|
||||
}
|
||||
|
||||
function rowClickHandle(row: any, column: any) {
|
||||
if (column && column.type === 'selection') {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,12 @@
|
|||
:toolbars="toolbars"
|
||||
style="height: 300px"
|
||||
@onUploadImg="onUploadImg"
|
||||
/>
|
||||
:footers="footers"
|
||||
>
|
||||
<template #defFooters>
|
||||
<span style="margin-left: -6px;">/ 4096</span>
|
||||
</template>
|
||||
</MarkdownEditor>
|
||||
<MdPreview
|
||||
v-else
|
||||
ref="editorRef"
|
||||
|
|
@ -76,6 +81,8 @@ const toolbars = [
|
|||
'htmlPreview'
|
||||
] as any[]
|
||||
|
||||
const footers = ['markdownTotal', 0, '=', 1, 'scrollSwitch']
|
||||
|
||||
const editorRef = ref()
|
||||
|
||||
const form = ref<any>({
|
||||
|
|
|
|||
Loading…
Reference in New Issue