From a3af104ef0921b9870e366800da763e374f20419 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Fri, 24 May 2024 11:27:59 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=9F=A5=E8=AF=86=E5=BA=93=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E9=87=8D=E6=96=B0=E5=90=91=E9=87=8F=E5=8C=96=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/event/listener_manage.py | 2 + .../serializers/dataset_serializers.py | 5 ++ .../serializers/document_serializers.py | 13 +--- apps/dataset/urls.py | 2 + apps/dataset/views/dataset.py | 15 +++++ apps/dataset/views/document.py | 18 +++++ ui/src/api/dataset.ts | 14 ++++ ui/src/api/document.ts | 22 +++++-- ui/src/components/icons/index.ts | 37 +++++++++++ ui/src/components/markdown-editor/index.vue | 6 +- ui/src/views/dataset/index.vue | 13 +++- ui/src/views/document/index.vue | 66 ++++++++++--------- .../paragraph/component/ParagraphForm.vue | 9 ++- 13 files changed, 169 insertions(+), 53 deletions(-) diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index 415d20562..ea7c9b508 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -123,6 +123,8 @@ class ListenerManagement: :return: None """ max_kb.info(f"开始--->向量化文档:{document_id}") + QuerySet(Document).filter(id=document_id).update(**{'status': Status.embedding}) + QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': Status.embedding}) status = Status.success try: data_list = native_search( diff --git a/apps/dataset/serializers/dataset_serializers.py b/apps/dataset/serializers/dataset_serializers.py index 61d4b1a3a..028b3e62a 100644 --- a/apps/dataset/serializers/dataset_serializers.py +++ b/apps/dataset/serializers/dataset_serializers.py @@ -680,6 +680,11 @@ class DataSetSerializers(serializers.ModelSerializer): ListenerManagement.delete_embedding_by_dataset_signal.send(self.data.get('id')) return True + def re_embedding(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id')) + def list_application(self, with_valid=True): if with_valid: self.is_valid(raise_exception=True) diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index 5d6518ad5..e97b2279c 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -448,18 +448,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): if with_valid: self.is_valid(raise_exception=True) document_id = self.data.get("document_id") - document = QuerySet(Document).filter(id=document_id).first() - if document.type == Type.web: - # 异步同步 - work_thread_pool.submit(lambda x: DocumentSerializers.Sync(data={'document_id': document_id}).sync(), - {}) - - else: - if document.status != Status.embedding.value: - document.status = Status.embedding - document.save() - ListenerManagement.embedding_by_document_signal.send(document_id) - return True + ListenerManagement.embedding_by_document_signal.send(document_id) @transaction.atomic def delete(self): diff --git a/apps/dataset/urls.py b/apps/dataset/urls.py index b9d1bd431..dd06ba254 100644 --- a/apps/dataset/urls.py +++ b/apps/dataset/urls.py @@ -8,6 +8,7 @@ urlpatterns = [ path('dataset/web', views.Dataset.CreateWebDataset.as_view()), path('dataset/qa', views.Dataset.CreateQADataset.as_view()), path('dataset/', views.Dataset.Operate.as_view(), name="dataset_key"), + path('dataset//re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"), path('dataset//application', views.Dataset.Application.as_view()), path('dataset//', views.Dataset.Page.as_view(), name="dataset"), path('dataset//sync_web', views.Dataset.SyncWeb.as_view()), @@ -26,6 +27,7 @@ urlpatterns = [ path('dataset/document/split_pattern', views.Document.SplitPattern.as_view(), name="document_operate"), path('dataset//document/migrate/', views.Document.Migrate.as_view()), + path('dataset//document//sync', views.Document.SyncWeb.as_view()), path('dataset//document//refresh', views.Document.Refresh.as_view()), path('dataset//document//paragraph', views.Paragraph.as_view()), path( diff --git a/apps/dataset/views/dataset.py b/apps/dataset/views/dataset.py index a864f089c..92e3c1451 100644 --- a/apps/dataset/views/dataset.py +++ b/apps/dataset/views/dataset.py @@ -137,6 +137,21 @@ class Dataset(APIView): 'search_mode': request.query_params.get('search_mode')}).hit_test( )) + class Embedding(APIView): + authentication_classes = [TokenAuth] + + @action(methods="PUT", detail=False) + @swagger_auto_schema(operation_summary="重新向量化", operation_id="重新向量化", + manual_parameters=DataSetSerializers.Operate.get_request_params_api(), + responses=result.get_default_response(), + tags=["知识库"] + ) + @has_permissions(lambda r, keywords: Permission(group=Group.DATASET, operate=Operate.MANAGE, + dynamic_tag=keywords.get('dataset_id'))) + def put(self, request: Request, dataset_id: str): + return result.success( + DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding()) + class Operate(APIView): authentication_classes = [TokenAuth] diff --git a/apps/dataset/views/document.py b/apps/dataset/views/document.py index 443f650d9..90849ce7e 100644 --- a/apps/dataset/views/document.py +++ b/apps/dataset/views/document.py @@ -168,6 +168,24 @@ class Document(APIView): def delete(self, request: Request, dataset_id: str): return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data)) + class SyncWeb(APIView): + authentication_classes = [TokenAuth] + + @action(methods=['PUT'], detail=False) + @swagger_auto_schema(operation_summary="同步web站点类型", + operation_id="同步web站点类型", + manual_parameters=DocumentSerializers.Operate.get_request_params_api(), + responses=result.get_default_response(), + tags=["知识库/文档"] + ) + @has_permissions( + lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE, + dynamic_tag=k.get('dataset_id'))) + def put(self, request: Request, dataset_id: str, document_id: str): + return result.success( + DocumentSerializers.Sync(data={'document_id': document_id, 'dataset_id': dataset_id}).sync( + )) + class Refresh(APIView): authentication_classes = [TokenAuth] diff --git a/ui/src/api/dataset.ts b/ui/src/api/dataset.ts index 63c93f1a4..0829bb425 100644 --- a/ui/src/api/dataset.ts +++ b/ui/src/api/dataset.ts @@ -176,6 +176,17 @@ const putSyncWebDataset: ( return put(`${prefix}/${dataset_id}/sync_web`, undefined, { sync_type }, loading) } +/** + * 重新向量化知识库 + * @param 参数 dataset_id + */ +const putReEmbeddingDataset: ( + dataset_id: string, + loading?: Ref +) => Promise> = (dataset_id, loading) => { + return put(`${prefix}/${dataset_id}/re_embedding`, undefined, undefined, loading) +} + export default { getDataset, getAllDataset, @@ -186,6 +197,9 @@ export default { listUsableApplication, getDatasetHitTest, postWebDataset, + putSyncWebDataset, + putReEmbeddingDataset, postQADataset, putSyncWebDataset + } diff --git a/ui/src/api/document.ts b/ui/src/api/document.ts index 647fab34a..413c1f6c9 100644 --- a/ui/src/api/document.ts +++ b/ui/src/api/document.ts @@ -137,12 +137,8 @@ const getDocumentDetail: (dataset_id: string, document_id: string) => Promise +) => Promise> = (dataset_id, document_id, loading) => { + return put(`${prefix}/${dataset_id}/document/${document_id}/sync`, undefined, undefined, loading) +} + /** * 批量同步文档 * @param 参数 dataset_id, @@ -258,6 +267,7 @@ export default { getDocumentDetail, listSplitPattern, putDocumentRefresh, + putDocumentSync, delMulSyncDocument, postWebDocument, putMigrateMulDocument, diff --git a/ui/src/components/icons/index.ts b/ui/src/components/icons/index.ts index 6d141d364..1965fe3ba 100644 --- a/ui/src/components/icons/index.ts +++ b/ui/src/components/icons/index.ts @@ -873,5 +873,42 @@ export const iconMap: any = { ) ]) } + }, + 'app-document-refresh': { + iconReader: () => { + return h('i', [ + h( + 'svg', + { + style: { height: '100%', width: '100%' }, + viewBox: '0 0 1024 1024', + version: '1.1', + xmlns: 'http://www.w3.org/2000/svg' + }, + [ + h('path', { + d: 'M494.592 165.12l-320 208a32 32 0 0 0-14.592 26.88v224a32 32 0 0 0 14.592 26.88l320 208a32 32 0 0 0 34.88 0l320-208a32 32 0 0 0 14.528-26.88v-224a32 32 0 0 0-14.528-26.88l-320-208a32 32 0 0 0-34.88 0zM224 417.408L512 230.144l288 187.2V606.72L512 793.856 224 606.656V417.28z', + fill: 'currentColor' + }), + h('path', { + d: 'M512 592a32 32 0 0 0-32 32V832a32 32 0 0 0 64 0V624a32 32 0 0 0-32-32z', + fill: 'currentColor' + }), + h('path', { + d: 'M165.76 381.632a32 32 0 0 0 7.872 44.608l320 224a32 32 0 0 0 36.736 0l320-224a32 32 0 0 0-36.736-52.48L512 584.96l-301.632-211.2a32 32 0 0 0-44.608 7.872z', + fill: 'currentColor' + }), + h('path', { + d: 'M493.632 373.76a32 32 0 0 1 36.736 0l320 224a32 32 0 0 1-36.736 52.48L512 439.04l-301.632 211.2a32 32 0 1 1-36.736-52.48l320-224z', + fill: 'currentColor' + }), + h('path', { + d: 'M512 160a32 32 0 0 0-32 32v208a32 32 0 0 0 64 0V192a32 32 0 0 0-32-32z', + fill: 'currentColor' + }) + ] + ) + ]) + } } } diff --git a/ui/src/components/markdown-editor/index.vue b/ui/src/components/markdown-editor/index.vue index c5efbc2c7..d269662e9 100644 --- a/ui/src/components/markdown-editor/index.vue +++ b/ui/src/components/markdown-editor/index.vue @@ -1,5 +1,9 @@