mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
feat: Knowledge base generation problem (#2760)
This commit is contained in:
parent
06867d33cb
commit
4b9cecd4d1
|
|
@ -222,3 +222,26 @@ def get_embedding_model_id_by_dataset_id_list(dataset_id_list: List):
|
|||
if len(dataset_list) == 0:
|
||||
raise Exception(_('Knowledge base setting error, please reset the knowledge base'))
|
||||
return str(dataset_list[0].embedding_mode_id)
|
||||
|
||||
|
||||
class GenerateRelatedSerializer(ApiMixin, serializers.Serializer):
|
||||
model_id = serializers.UUIDField(required=True, error_messages=ErrMessage.uuid(_('Model id')))
|
||||
prompt = serializers.CharField(required=True, error_messages=ErrMessage.uuid(_('Prompt word')))
|
||||
state_list = serializers.ListField(required=False, child=serializers.CharField(required=True),
|
||||
error_messages=ErrMessage.list("state list"))
|
||||
|
||||
@staticmethod
|
||||
def get_request_body_api():
|
||||
return openapi.Schema(
|
||||
type=openapi.TYPE_OBJECT,
|
||||
properties={
|
||||
'model_id': openapi.Schema(type=openapi.TYPE_STRING,
|
||||
title=_('Model id'),
|
||||
description=_('Model id')),
|
||||
'prompt': openapi.Schema(type=openapi.TYPE_STRING, title=_('Prompt word'),
|
||||
description=_("Prompt word")),
|
||||
'state_list': openapi.Schema(type=openapi.TYPE_ARRAY,
|
||||
items=openapi.Schema(type=openapi.TYPE_STRING),
|
||||
title=_('state list'))
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from django.contrib.postgres.fields import ArrayField
|
|||
from django.core import validators
|
||||
from django.db import transaction, models
|
||||
from django.db.models import QuerySet
|
||||
from django.db.models.functions import Reverse, Substr
|
||||
from django.http import HttpResponse
|
||||
from drf_yasg import openapi
|
||||
from rest_framework import serializers
|
||||
|
|
@ -42,9 +43,10 @@ from common.util.split_model import get_split_model
|
|||
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping, TaskType, \
|
||||
State, File, Image
|
||||
from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \
|
||||
get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir
|
||||
get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir, \
|
||||
GenerateRelatedSerializer
|
||||
from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer
|
||||
from dataset.task import sync_web_dataset, sync_replace_web_dataset
|
||||
from dataset.task import sync_web_dataset, sync_replace_web_dataset, generate_related_by_dataset_id
|
||||
from embedding.models import SearchMode
|
||||
from embedding.task import embedding_by_dataset, delete_embedding_by_dataset
|
||||
from setting.models import AuthOperate, Model
|
||||
|
|
@ -814,6 +816,31 @@ class DataSetSerializers(serializers.ModelSerializer):
|
|||
except AlreadyQueued as e:
|
||||
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
|
||||
|
||||
def generate_related(self, instance: Dict, with_valid=True):
|
||||
if with_valid:
|
||||
self.is_valid(raise_exception=True)
|
||||
GenerateRelatedSerializer(data=instance).is_valid(raise_exception=True)
|
||||
dataset_id = self.data.get('id')
|
||||
model_id = instance.get("model_id")
|
||||
prompt = instance.get("prompt")
|
||||
state_list = instance.get('state_list')
|
||||
ListenerManagement.update_status(QuerySet(Document).filter(dataset_id=dataset_id),
|
||||
TaskType.GENERATE_PROBLEM,
|
||||
State.PENDING)
|
||||
ListenerManagement.update_status(QuerySet(Paragraph).annotate(
|
||||
reversed_status=Reverse('status'),
|
||||
task_type_status=Substr('reversed_status', TaskType.GENERATE_PROBLEM.value,
|
||||
1),
|
||||
).filter(task_type_status__in=state_list, dataset_id=dataset_id)
|
||||
.values('id'),
|
||||
TaskType.GENERATE_PROBLEM,
|
||||
State.PENDING)
|
||||
ListenerManagement.get_aggregation_document_status_by_dataset_id(dataset_id)()
|
||||
try:
|
||||
generate_related_by_dataset_id.delay(dataset_id, model_id, prompt, state_list)
|
||||
except AlreadyQueued as e:
|
||||
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
|
||||
|
||||
def list_application(self, with_valid=True):
|
||||
if with_valid:
|
||||
self.is_valid(raise_exception=True)
|
||||
|
|
|
|||
|
|
@ -64,6 +64,17 @@ def get_is_the_task_interrupted(document_id):
|
|||
return is_the_task_interrupted
|
||||
|
||||
|
||||
@celery_app.task(base=QueueOnce, once={'keys': ['dataset_id']},
|
||||
name='celery:generate_related_by_dataset')
|
||||
def generate_related_by_dataset_id(dataset_id, model_id, prompt, state_list=None):
|
||||
document_list = QuerySet(Document).filter(dataset_id=dataset_id)
|
||||
for document in document_list:
|
||||
try:
|
||||
generate_related_by_document_id.delay(document.id, model_id, prompt, state_list)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
@celery_app.task(base=QueueOnce, once={'keys': ['document_id']},
|
||||
name='celery:generate_related_by_document')
|
||||
def generate_related_by_document_id(document_id, model_id, prompt, state_list=None):
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ urlpatterns = [
|
|||
path('dataset/<str:dataset_id>/export', views.Dataset.Export.as_view(), name="export"),
|
||||
path('dataset/<str:dataset_id>/export_zip', views.Dataset.ExportZip.as_view(), name="export_zip"),
|
||||
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
|
||||
path('dataset/<str:dataset_id>/generate_related', views.Dataset.GenerateRelated.as_view(),
|
||||
name="dataset_generate_related"),
|
||||
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
|
||||
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
|
||||
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from common.log.log import log
|
|||
from common.response import result
|
||||
from common.response.result import get_page_request_params, get_page_api_response, get_api_response
|
||||
from common.swagger_api.common_api import CommonApi
|
||||
from dataset.serializers.common_serializers import GenerateRelatedSerializer
|
||||
from dataset.serializers.dataset_serializers import DataSetSerializers
|
||||
from dataset.views.common import get_dataset_operation_object
|
||||
from setting.serializers.provider_serializers import ModelSerializer
|
||||
|
|
@ -173,6 +174,23 @@ class Dataset(APIView):
|
|||
return result.success(
|
||||
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
|
||||
|
||||
class GenerateRelated(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
@action(methods=['PUT'], detail=False)
|
||||
@swagger_auto_schema(operation_summary=_('Generate related'), operation_id=_('Generate related'),
|
||||
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
|
||||
request_body=GenerateRelatedSerializer.get_request_body_api(),
|
||||
tags=[_('Knowledge Base')]
|
||||
)
|
||||
@log(menu='document', operate="Generate related documents",
|
||||
get_operation_object=lambda r, keywords: get_dataset_operation_object(keywords.get('dataset_id'))
|
||||
)
|
||||
def put(self, request: Request, dataset_id: str):
|
||||
return result.success(
|
||||
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).generate_related(
|
||||
request.data))
|
||||
|
||||
class Export(APIView):
|
||||
authentication_classes = [TokenAuth]
|
||||
|
||||
|
|
|
|||
|
|
@ -7487,4 +7487,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
|
|||
msgstr ""
|
||||
|
||||
msgid "Field: {name} No value set"
|
||||
msgstr ""
|
||||
|
||||
msgid "Generate related"
|
||||
msgstr ""
|
||||
|
|
@ -7650,4 +7650,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
|
|||
msgstr "字段: {name} 类型: {_type} 值: {value} 不支持的类型"
|
||||
|
||||
msgid "Field: {name} No value set"
|
||||
msgstr "字段: {name} 未设置值"
|
||||
msgstr "字段: {name} 未设置值"
|
||||
|
||||
msgid "Generate related"
|
||||
msgstr "生成问题"
|
||||
|
|
@ -7660,4 +7660,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
|
|||
msgstr "欄位: {name} 類型: {_type} 值: {value} 不支持的類型"
|
||||
|
||||
msgid "Field: {name} No value set"
|
||||
msgstr "欄位: {name} 未設定值"
|
||||
msgstr "欄位: {name} 未設定值"
|
||||
|
||||
msgid "Generate related"
|
||||
msgstr "生成問題"
|
||||
|
|
@ -277,6 +277,20 @@ const importLarkDocument: (
|
|||
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
|
||||
return post(`${prefix}/lark/${dataset_id}/import`, data, null, loading)
|
||||
}
|
||||
/**
|
||||
* 生成关联问题
|
||||
* @param dataset_id 知识库id
|
||||
* @param data
|
||||
* @param loading
|
||||
* @returns
|
||||
*/
|
||||
const generateRelated: (
|
||||
dataset_id: string,
|
||||
data: any,
|
||||
loading?: Ref<boolean>
|
||||
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
|
||||
return put(`${prefix}/${dataset_id}/generate_related`, data, null, loading)
|
||||
}
|
||||
|
||||
export default {
|
||||
getDataset,
|
||||
|
|
@ -297,5 +311,6 @@ export default {
|
|||
postLarkDataset,
|
||||
getLarkDocumentList,
|
||||
importLarkDocument,
|
||||
putLarkDataset
|
||||
putLarkDataset,
|
||||
generateRelated
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@
|
|||
/>
|
||||
</el-form-item>
|
||||
<el-form-item
|
||||
v-if="apiType === 'document'"
|
||||
v-if="['document', 'dataset'].includes(apiType)"
|
||||
:label="$t('components.selectParagraph.title')"
|
||||
prop="state"
|
||||
>
|
||||
|
|
@ -107,6 +107,7 @@ const stateMap = {
|
|||
error: ['0', '1', '3', '4', '5', 'n']
|
||||
}
|
||||
const FormRef = ref()
|
||||
const datasetId = ref<string>()
|
||||
const userId = user.userInfo?.id as string
|
||||
const form = ref(prompt.get(userId))
|
||||
const rules = reactive({
|
||||
|
|
@ -133,7 +134,8 @@ watch(dialogVisible, (bool) => {
|
|||
}
|
||||
})
|
||||
|
||||
const open = (ids: string[], type: string) => {
|
||||
const open = (ids: string[], type: string, _datasetId?: string) => {
|
||||
datasetId.value = _datasetId
|
||||
getModel()
|
||||
idList.value = ids
|
||||
apiType.value = type
|
||||
|
|
@ -169,6 +171,15 @@ const submitHandle = async (formEl: FormInstance) => {
|
|||
emit('refresh')
|
||||
dialogVisible.value = false
|
||||
})
|
||||
} else if (apiType.value === 'dataset') {
|
||||
const data = {
|
||||
...form.value,
|
||||
state_list: stateMap[state.value]
|
||||
}
|
||||
datasetApi.generateRelated(id ? id : datasetId.value, data, loading).then(() => {
|
||||
MsgSuccess(t('views.document.generateQuestion.successMessage'))
|
||||
dialogVisible.value = false
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
|
|
@ -177,7 +188,7 @@ const submitHandle = async (formEl: FormInstance) => {
|
|||
function getModel() {
|
||||
loading.value = true
|
||||
datasetApi
|
||||
.getDatasetModel(id)
|
||||
.getDatasetModel(id ? id : datasetId.value)
|
||||
.then((res: any) => {
|
||||
modelOptions.value = groupBy(res?.data, 'provider')
|
||||
loading.value = false
|
||||
|
|
|
|||
|
|
@ -127,6 +127,7 @@
|
|||
v-if="item.type === '1'"
|
||||
>{{ $t('views.dataset.setting.sync') }}</el-dropdown-item
|
||||
>
|
||||
|
||||
<el-dropdown-item @click="reEmbeddingDataset(item)">
|
||||
<AppIcon
|
||||
iconName="app-document-refresh"
|
||||
|
|
@ -134,6 +135,11 @@
|
|||
></AppIcon>
|
||||
{{ $t('views.dataset.setting.vectorization') }}</el-dropdown-item
|
||||
>
|
||||
<el-dropdown-item
|
||||
icon="Connection"
|
||||
@click.stop="openGenerateDialog(item)"
|
||||
>{{ $t('views.document.generateQuestion.title') }}</el-dropdown-item
|
||||
>
|
||||
<el-dropdown-item
|
||||
icon="Setting"
|
||||
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
|
||||
|
|
@ -165,10 +171,11 @@
|
|||
</div>
|
||||
<SyncWebDialog ref="SyncWebDialogRef" @refresh="refresh" />
|
||||
<CreateDatasetDialog ref="CreateDatasetDialogRef" />
|
||||
<GenerateRelatedDialog ref="GenerateRelatedDialogRef" />
|
||||
</div>
|
||||
</template>
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted, reactive, computed } from 'vue'
|
||||
import { ref, onMounted, reactive } from 'vue'
|
||||
import SyncWebDialog from '@/views/dataset/component/SyncWebDialog.vue'
|
||||
import CreateDatasetDialog from './component/CreateDatasetDialog.vue'
|
||||
import datasetApi from '@/api/dataset'
|
||||
|
|
@ -179,7 +186,7 @@ import { ValidType, ValidCount } from '@/enums/common'
|
|||
import { t } from '@/locales'
|
||||
import useStore from '@/stores'
|
||||
import applicationApi from '@/api/application'
|
||||
|
||||
import GenerateRelatedDialog from '@/components/generate-related-dialog/index.vue'
|
||||
const { user, common } = useStore()
|
||||
const router = useRouter()
|
||||
|
||||
|
|
@ -192,6 +199,12 @@ const paginationConfig = reactive({
|
|||
page_size: 30,
|
||||
total: 0
|
||||
})
|
||||
const GenerateRelatedDialogRef = ref<InstanceType<typeof GenerateRelatedDialog>>()
|
||||
function openGenerateDialog(row: any) {
|
||||
if (GenerateRelatedDialogRef.value) {
|
||||
GenerateRelatedDialogRef.value.open([], 'dataset', row.id)
|
||||
}
|
||||
}
|
||||
|
||||
const searchValue = ref('')
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue