feat: Knowledge base generation problem (#2760)
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Typos Check / Spell Check with Typos (push) Waiting to run

This commit is contained in:
shaohuzhang1 2025-04-01 12:46:30 +08:00 committed by GitHub
parent 06867d33cb
commit 4b9cecd4d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 139 additions and 10 deletions

View File

@ -222,3 +222,26 @@ def get_embedding_model_id_by_dataset_id_list(dataset_id_list: List):
if len(dataset_list) == 0:
raise Exception(_('Knowledge base setting error, please reset the knowledge base'))
return str(dataset_list[0].embedding_mode_id)
class GenerateRelatedSerializer(ApiMixin, serializers.Serializer):
model_id = serializers.UUIDField(required=True, error_messages=ErrMessage.uuid(_('Model id')))
prompt = serializers.CharField(required=True, error_messages=ErrMessage.uuid(_('Prompt word')))
state_list = serializers.ListField(required=False, child=serializers.CharField(required=True),
error_messages=ErrMessage.list("state list"))
@staticmethod
def get_request_body_api():
return openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
'model_id': openapi.Schema(type=openapi.TYPE_STRING,
title=_('Model id'),
description=_('Model id')),
'prompt': openapi.Schema(type=openapi.TYPE_STRING, title=_('Prompt word'),
description=_("Prompt word")),
'state_list': openapi.Schema(type=openapi.TYPE_ARRAY,
items=openapi.Schema(type=openapi.TYPE_STRING),
title=_('state list'))
}
)

View File

@ -23,6 +23,7 @@ from django.contrib.postgres.fields import ArrayField
from django.core import validators
from django.db import transaction, models
from django.db.models import QuerySet
from django.db.models.functions import Reverse, Substr
from django.http import HttpResponse
from drf_yasg import openapi
from rest_framework import serializers
@ -42,9 +43,10 @@ from common.util.split_model import get_split_model
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping, TaskType, \
State, File, Image
from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \
get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir
get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir, \
GenerateRelatedSerializer
from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer
from dataset.task import sync_web_dataset, sync_replace_web_dataset
from dataset.task import sync_web_dataset, sync_replace_web_dataset, generate_related_by_dataset_id
from embedding.models import SearchMode
from embedding.task import embedding_by_dataset, delete_embedding_by_dataset
from setting.models import AuthOperate, Model
@ -814,6 +816,31 @@ class DataSetSerializers(serializers.ModelSerializer):
except AlreadyQueued as e:
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
def generate_related(self, instance: Dict, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
GenerateRelatedSerializer(data=instance).is_valid(raise_exception=True)
dataset_id = self.data.get('id')
model_id = instance.get("model_id")
prompt = instance.get("prompt")
state_list = instance.get('state_list')
ListenerManagement.update_status(QuerySet(Document).filter(dataset_id=dataset_id),
TaskType.GENERATE_PROBLEM,
State.PENDING)
ListenerManagement.update_status(QuerySet(Paragraph).annotate(
reversed_status=Reverse('status'),
task_type_status=Substr('reversed_status', TaskType.GENERATE_PROBLEM.value,
1),
).filter(task_type_status__in=state_list, dataset_id=dataset_id)
.values('id'),
TaskType.GENERATE_PROBLEM,
State.PENDING)
ListenerManagement.get_aggregation_document_status_by_dataset_id(dataset_id)()
try:
generate_related_by_dataset_id.delay(dataset_id, model_id, prompt, state_list)
except AlreadyQueued as e:
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
def list_application(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)

View File

@ -64,6 +64,17 @@ def get_is_the_task_interrupted(document_id):
return is_the_task_interrupted
@celery_app.task(base=QueueOnce, once={'keys': ['dataset_id']},
name='celery:generate_related_by_dataset')
def generate_related_by_dataset_id(dataset_id, model_id, prompt, state_list=None):
document_list = QuerySet(Document).filter(dataset_id=dataset_id)
for document in document_list:
try:
generate_related_by_document_id.delay(document.id, model_id, prompt, state_list)
except Exception as e:
pass
@celery_app.task(base=QueueOnce, once={'keys': ['document_id']},
name='celery:generate_related_by_document')
def generate_related_by_document_id(document_id, model_id, prompt, state_list=None):

View File

@ -11,6 +11,8 @@ urlpatterns = [
path('dataset/<str:dataset_id>/export', views.Dataset.Export.as_view(), name="export"),
path('dataset/<str:dataset_id>/export_zip', views.Dataset.ExportZip.as_view(), name="export_zip"),
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/generate_related', views.Dataset.GenerateRelated.as_view(),
name="dataset_generate_related"),
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),

View File

@ -21,6 +21,7 @@ from common.log.log import log
from common.response import result
from common.response.result import get_page_request_params, get_page_api_response, get_api_response
from common.swagger_api.common_api import CommonApi
from dataset.serializers.common_serializers import GenerateRelatedSerializer
from dataset.serializers.dataset_serializers import DataSetSerializers
from dataset.views.common import get_dataset_operation_object
from setting.serializers.provider_serializers import ModelSerializer
@ -173,6 +174,23 @@ class Dataset(APIView):
return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
class GenerateRelated(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Generate related'), operation_id=_('Generate related'),
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
request_body=GenerateRelatedSerializer.get_request_body_api(),
tags=[_('Knowledge Base')]
)
@log(menu='document', operate="Generate related documents",
get_operation_object=lambda r, keywords: get_dataset_operation_object(keywords.get('dataset_id'))
)
def put(self, request: Request, dataset_id: str):
return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).generate_related(
request.data))
class Export(APIView):
authentication_classes = [TokenAuth]

View File

@ -7487,4 +7487,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr ""
msgid "Field: {name} No value set"
msgstr ""
msgid "Generate related"
msgstr ""

View File

@ -7650,4 +7650,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr "字段: {name} 类型: {_type} 值: {value} 不支持的类型"
msgid "Field: {name} No value set"
msgstr "字段: {name} 未设置值"
msgstr "字段: {name} 未设置值"
msgid "Generate related"
msgstr "生成问题"

View File

@ -7660,4 +7660,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr "欄位: {name} 類型: {_type} 值: {value} 不支持的類型"
msgid "Field: {name} No value set"
msgstr "欄位: {name} 未設定值"
msgstr "欄位: {name} 未設定值"
msgid "Generate related"
msgstr "生成問題"

View File

@ -277,6 +277,20 @@ const importLarkDocument: (
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
return post(`${prefix}/lark/${dataset_id}/import`, data, null, loading)
}
/**
*
* @param dataset_id id
* @param data
* @param loading
* @returns
*/
const generateRelated: (
dataset_id: string,
data: any,
loading?: Ref<boolean>
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
return put(`${prefix}/${dataset_id}/generate_related`, data, null, loading)
}
export default {
getDataset,
@ -297,5 +311,6 @@ export default {
postLarkDataset,
getLarkDocumentList,
importLarkDocument,
putLarkDataset
putLarkDataset,
generateRelated
}

View File

@ -51,7 +51,7 @@
/>
</el-form-item>
<el-form-item
v-if="apiType === 'document'"
v-if="['document', 'dataset'].includes(apiType)"
:label="$t('components.selectParagraph.title')"
prop="state"
>
@ -107,6 +107,7 @@ const stateMap = {
error: ['0', '1', '3', '4', '5', 'n']
}
const FormRef = ref()
const datasetId = ref<string>()
const userId = user.userInfo?.id as string
const form = ref(prompt.get(userId))
const rules = reactive({
@ -133,7 +134,8 @@ watch(dialogVisible, (bool) => {
}
})
const open = (ids: string[], type: string) => {
const open = (ids: string[], type: string, _datasetId?: string) => {
datasetId.value = _datasetId
getModel()
idList.value = ids
apiType.value = type
@ -169,6 +171,15 @@ const submitHandle = async (formEl: FormInstance) => {
emit('refresh')
dialogVisible.value = false
})
} else if (apiType.value === 'dataset') {
const data = {
...form.value,
state_list: stateMap[state.value]
}
datasetApi.generateRelated(id ? id : datasetId.value, data, loading).then(() => {
MsgSuccess(t('views.document.generateQuestion.successMessage'))
dialogVisible.value = false
})
}
}
})
@ -177,7 +188,7 @@ const submitHandle = async (formEl: FormInstance) => {
function getModel() {
loading.value = true
datasetApi
.getDatasetModel(id)
.getDatasetModel(id ? id : datasetId.value)
.then((res: any) => {
modelOptions.value = groupBy(res?.data, 'provider')
loading.value = false

View File

@ -127,6 +127,7 @@
v-if="item.type === '1'"
>{{ $t('views.dataset.setting.sync') }}</el-dropdown-item
>
<el-dropdown-item @click="reEmbeddingDataset(item)">
<AppIcon
iconName="app-document-refresh"
@ -134,6 +135,11 @@
></AppIcon>
{{ $t('views.dataset.setting.vectorization') }}</el-dropdown-item
>
<el-dropdown-item
icon="Connection"
@click.stop="openGenerateDialog(item)"
>{{ $t('views.document.generateQuestion.title') }}</el-dropdown-item
>
<el-dropdown-item
icon="Setting"
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
@ -165,10 +171,11 @@
</div>
<SyncWebDialog ref="SyncWebDialogRef" @refresh="refresh" />
<CreateDatasetDialog ref="CreateDatasetDialogRef" />
<GenerateRelatedDialog ref="GenerateRelatedDialogRef" />
</div>
</template>
<script setup lang="ts">
import { ref, onMounted, reactive, computed } from 'vue'
import { ref, onMounted, reactive } from 'vue'
import SyncWebDialog from '@/views/dataset/component/SyncWebDialog.vue'
import CreateDatasetDialog from './component/CreateDatasetDialog.vue'
import datasetApi from '@/api/dataset'
@ -179,7 +186,7 @@ import { ValidType, ValidCount } from '@/enums/common'
import { t } from '@/locales'
import useStore from '@/stores'
import applicationApi from '@/api/application'
import GenerateRelatedDialog from '@/components/generate-related-dialog/index.vue'
const { user, common } = useStore()
const router = useRouter()
@ -192,6 +199,12 @@ const paginationConfig = reactive({
page_size: 30,
total: 0
})
const GenerateRelatedDialogRef = ref<InstanceType<typeof GenerateRelatedDialog>>()
function openGenerateDialog(row: any) {
if (GenerateRelatedDialogRef.value) {
GenerateRelatedDialogRef.value.open([], 'dataset', row.id)
}
}
const searchValue = ref('')