feat: 直接回答支持设置相似度值(#371)

This commit is contained in:
shaohuzhang1 2024-05-08 17:31:56 +08:00 committed by GitHub
parent 267be441e3
commit 4da8b1b0d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 120 additions and 129 deletions

View File

@ -19,7 +19,7 @@ class ParagraphPipelineModel:
def __init__(self, _id: str, document_id: str, dataset_id: str, content: str, title: str, status: str,
is_active: bool, comprehensive_score: float, similarity: float, dataset_name: str, document_name: str,
hit_handling_method: str):
hit_handling_method: str, directly_return_similarity: float):
self.id = _id
self.document_id = document_id
self.dataset_id = dataset_id
@ -32,6 +32,7 @@ class ParagraphPipelineModel:
self.dataset_name = dataset_name
self.document_name = document_name
self.hit_handling_method = hit_handling_method
self.directly_return_similarity = directly_return_similarity
def to_dict(self):
return {
@ -56,6 +57,7 @@ class ParagraphPipelineModel:
self.document_name = None
self.dataset_name = None
self.hit_handling_method = None
self.directly_return_similarity = 0.9
def add_paragraph(self, paragraph):
if isinstance(paragraph, Paragraph):
@ -83,6 +85,10 @@ class ParagraphPipelineModel:
self.hit_handling_method = hit_handling_method
return self
def add_directly_return_similarity(self, directly_return_similarity):
self.directly_return_similarity = directly_return_similarity
return self
def add_comprehensive_score(self, comprehensive_score: float):
self.comprehensive_score = comprehensive_score
return self
@ -98,7 +104,7 @@ class ParagraphPipelineModel:
self.paragraph.get('status'),
self.paragraph.get('is_active'),
self.comprehensive_score, self.similarity, self.dataset_name,
self.document_name, self.hit_handling_method)
self.document_name, self.hit_handling_method, self.directly_return_similarity)
class IBaseChatPipelineStep:

View File

@ -138,8 +138,8 @@ class BaseChatStep(IChatStep):
if paragraph_list is None:
paragraph_list = []
directly_return_chunk_list = [AIMessageChunk(content=paragraph.content)
for paragraph in paragraph_list if
paragraph.hit_handling_method == 'directly_return']
for paragraph in paragraph_list if (
paragraph.hit_handling_method == 'directly_return' and paragraph.similarity >= paragraph.directly_return_similarity)]
if directly_return_chunk_list is not None and len(directly_return_chunk_list) > 0:
return iter(directly_return_chunk_list), False
elif len(paragraph_list) == 0 and no_references_setting.get(

View File

@ -52,6 +52,7 @@ class BaseSearchDatasetStep(ISearchDatasetStep):
.add_dataset_name(paragraph.get('dataset_name'))
.add_document_name(paragraph.get('document_name'))
.add_hit_handling_method(paragraph.get('hit_handling_method'))
.add_directly_return_similarity(paragraph.get('directly_return_similarity'))
.build())
@staticmethod
@ -81,7 +82,10 @@ class BaseSearchDatasetStep(ISearchDatasetStep):
vector.delete_by_paragraph_id(paragraph_id)
# 如果存在直接返回的则取直接返回段落
hit_handling_method_paragraph = [paragraph for paragraph in paragraph_list if
paragraph.get('hit_handling_method') == 'directly_return']
(paragraph.get(
'hit_handling_method') == 'directly_return' and BaseSearchDatasetStep.get_similarity(
paragraph, embedding_list) >= paragraph.get(
'directly_return_similarity'))]
if len(hit_handling_method_paragraph) > 0:
# 找到评分最高的
return [sorted(hit_handling_method_paragraph,

View File

@ -2,7 +2,8 @@ SELECT
paragraph.*,
dataset."name" AS "dataset_name",
"document"."name" AS "document_name",
"document"."hit_handling_method" AS "hit_handling_method"
"document"."hit_handling_method" AS "hit_handling_method",
"document"."directly_return_similarity" as "directly_return_similarity"
FROM
paragraph paragraph
LEFT JOIN dataset dataset ON dataset."id" = paragraph.dataset_id

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.13 on 2024-05-08 16:43
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataset', '0003_document_hit_handling_method'),
]
operations = [
migrations.AddField(
model_name='document',
name='directly_return_similarity',
field=models.FloatField(default=0.9, verbose_name='直接回答相似度'),
),
]

View File

@ -66,6 +66,7 @@ class Document(AppModelMixin):
hit_handling_method = models.CharField(verbose_name='命中处理方式', max_length=20,
choices=HitHandlingMethod.choices,
default=HitHandlingMethod.optimization)
directly_return_similarity = models.FloatField(verbose_name='直接回答相似度', default=0.9)
meta = models.JSONField(verbose_name="元数据", default=dict)

View File

@ -50,7 +50,13 @@ class DocumentEditInstanceSerializer(ApiMixin, serializers.Serializer):
code=500)
], error_messages=ErrMessage.char("命中处理方式"))
is_active = serializers.BooleanField(required=False, error_messages=ErrMessage.char(
directly_return_similarity = serializers.FloatField(required=False,
max_value=2,
min_value=0,
error_messages=ErrMessage.float(
"直接返回分数"))
is_active = serializers.BooleanField(required=False, error_messages=ErrMessage.boolean(
"文档是否可用"))
@staticmethod
@ -371,7 +377,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
_document = QuerySet(Document).get(id=self.data.get("document_id"))
if with_valid:
DocumentEditInstanceSerializer(data=instance).is_valid(document=_document)
update_keys = ['name', 'is_active', 'hit_handling_method', 'meta']
update_keys = ['name', 'is_active', 'hit_handling_method', 'directly_return_similarity', 'meta']
for update_key in update_keys:
if update_key in instance and instance.get(update_key) is not None:
_document.__setattr__(update_key, instance.get(update_key))
@ -444,6 +450,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
'is_active': openapi.Schema(type=openapi.TYPE_BOOLEAN, title="是否可用", description="是否可用"),
'hit_handling_method': openapi.Schema(type=openapi.TYPE_STRING, title="命中处理方式",
description="ai优化:optimization,直接返回:directly_return"),
'directly_return_similarity': openapi.Schema(type=openapi.TYPE_NUMBER, title="直接返回分数",
default=0.9),
'meta': openapi.Schema(type=openapi.TYPE_OBJECT, title="文档元数据",
description="文档元数据->web:{source_url:xxx,selector:'xxx'},base:{}"),
}
@ -731,7 +739,11 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
self.is_valid(raise_exception=True)
document_id_list = instance.get("id_list")
hit_handling_method = instance.get('hit_handling_method')
QuerySet(Document).filter(id__in=document_id_list).update(hit_handling_method=hit_handling_method)
directly_return_similarity = instance.get('directly_return_similarity')
update_dict = {'hit_handling_method': hit_handling_method}
if directly_return_similarity is not None:
update_dict['directly_return_similarity'] = directly_return_similarity
QuerySet(Document).filter(id__in=document_id_list).update(**update_dict)
class FileBufferHandle:

View File

@ -22,6 +22,7 @@ class DocumentApi(ApiMixin):
title="主键id列表",
description="主键id列表"),
'hit_handling_method': openapi.Schema(type=openapi.TYPE_STRING, title="命中处理方式",
description="directly_return|optimization")
description="directly_return|optimization"),
'directly_return_similarity': openapi.Schema(type=openapi.TYPE_NUMBER, title="直接返回相似度")
}
)

View File

@ -1,96 +0,0 @@
<template>
<el-dialog
title="设置"
v-model="dialogVisible"
:close-on-click-modal="false"
:close-on-press-escape="false"
:destroy-on-close="true"
width="400"
>
<el-form
label-position="top"
ref="webFormRef"
:rules="rules"
:model="form"
require-asterisk-position="right"
>
<el-form-item>
<template #label>
<div class="flex align-center">
<span class="mr-4">命中处理方式</span>
<el-tooltip
effect="dark"
content="用户提问时,命中文档下的分段时按照设置的方式进行处理。"
placement="right"
>
<AppIcon iconName="app-warning" class="app-warning-icon"></AppIcon>
</el-tooltip>
</div>
</template>
<el-radio-group v-model="form.hit_handling_method">
<template v-for="(value, key) of hitHandlingMethod" :key="key">
<el-radio :value="key">{{ value }}</el-radio>
</template>
</el-radio-group>
</el-form-item>
</el-form>
<template #footer>
<span class="dialog-footer">
<el-button @click.prevent="dialogVisible = false"> 取消 </el-button>
<el-button type="primary" @click="submit(webFormRef)" :loading="loading"> 确定 </el-button>
</span>
</template>
</el-dialog>
</template>
<script setup lang="ts">
import { ref, reactive, watch } from 'vue'
import { useRoute } from 'vue-router'
import type { FormInstance, FormRules } from 'element-plus'
import documentApi from '@/api/document'
import { MsgSuccess } from '@/utils/message'
import { hitHandlingMethod } from '../utils'
const route = useRoute()
const {
params: { id }
} = route as any
const emit = defineEmits(['refresh'])
const webFormRef = ref()
const loading = ref<boolean>(false)
const documentList = ref<Array<string>>([])
const form = ref<any>({
hit_handling_method: 'optimization'
})
const rules = reactive({
source_url: [{ required: true, message: '请输入文档地址', trigger: 'blur' }]
})
const dialogVisible = ref<boolean>(false)
const open = (list: Array<string>) => {
documentList.value = list
dialogVisible.value = true
}
const submit = async (formEl: FormInstance | undefined) => {
if (!formEl) return
await formEl.validate((valid, fields) => {
if (valid) {
const obj = {
hit_handling_method: form.value.hit_handling_method,
id_list: documentList.value
}
documentApi.batchEditHitHandling(id, obj, loading).then((res: any) => {
MsgSuccess('设置成功')
emit('refresh')
dialogVisible.value = false
})
}
})
}
defineExpose({ open })
</script>
<style lang="scss" scoped></style>

View File

@ -43,11 +43,28 @@
</el-tooltip>
</div>
</template>
<el-radio-group v-model="form.hit_handling_method">
<el-radio-group v-model="form.hit_handling_method" class="radio-block mt-4">
<template v-for="(value, key) of hitHandlingMethod" :key="key">
<el-radio :value="key">{{ value }}</el-radio>
<el-radio :value="key">{{ value }} </el-radio>
</template>
</el-radio-group>
<div
v-if="form.hit_handling_method === 'directly_return'"
class="lighter"
style="margin-left: 21px"
>
<span>相似度高于</span>
<el-input-number
v-model="form.directly_return_similarity"
:min="0"
:max="1"
:precision="3"
:step="0.1"
controls-position="right"
size="small"
class="ml-4 mr-4"
/><span></span>
</div>
</el-form-item>
</el-form>
<template #footer>
@ -78,11 +95,17 @@ const isImport = ref<boolean>(false)
const form = ref<any>({
source_url: '',
selector: '',
hit_handling_method: ''
hit_handling_method: 'optimization',
directly_return_similarity: 0.9
})
//
const documentId = ref('')
const documentType = ref<string | number>('') //1: web0:
//
const documentList = ref<Array<string>>([])
const rules = reactive({
source_url: [{ required: true, message: '请输入文档地址', trigger: 'blur' }]
})
@ -94,20 +117,30 @@ watch(dialogVisible, (bool) => {
form.value = {
source_url: '',
selector: '',
hit_handling_method: ''
hit_handling_method: 'optimization',
directly_return_similarity: 0.9
}
isImport.value = false
documentType.value = ''
documentList.value = []
}
})
const open = (row: any) => {
const open = (row: any, list: Array<string>) => {
if (row) {
documentType.value = row.type
documentId.value = row.id
form.value = { hit_handling_method: row.hit_handling_method, ...row.meta }
form.value = {
hit_handling_method: row.hit_handling_method,
directly_return_similarity: row.directly_return_similarity,
...row.meta
}
isImport.value = false
} else if (list) {
//
documentList.value = list
} else {
//
isImport.value = true
}
dialogVisible.value = true
@ -128,18 +161,33 @@ const submit = async (formEl: FormInstance | undefined) => {
dialogVisible.value = false
})
} else {
const obj = {
hit_handling_method: form.value.hit_handling_method,
meta: {
source_url: form.value.source_url,
selector: form.value.selector
if (documentId.value) {
const obj = {
hit_handling_method: form.value.hit_handling_method,
directly_return_similarity: form.value.directly_return_similarity,
meta: {
source_url: form.value.source_url,
selector: form.value.selector
}
}
documentApi.putDocument(id, documentId.value, obj, loading).then((res) => {
MsgSuccess('设置成功')
emit('refresh')
dialogVisible.value = false
})
} else if (documentList.value.length > 0) {
//
const obj = {
hit_handling_method: form.value.hit_handling_method,
directly_return_similarity: form.value.directly_return_similarity,
id_list: documentList.value
}
documentApi.batchEditHitHandling(id, obj, loading).then((res: any) => {
MsgSuccess('设置成功')
emit('refresh')
dialogVisible.value = false
})
}
documentApi.putDocument(id, documentId.value, obj, loading).then((res) => {
MsgSuccess('设置成功')
emit('refresh')
dialogVisible.value = false
})
}
}
})

View File

@ -215,10 +215,6 @@
</div>
<ImportDocumentDialog ref="ImportDocumentDialogRef" :title="title" @refresh="refresh" />
<SyncWebDialog ref="SyncWebDialogRef" @refresh="refresh" />
<BatchEditDocumentDialog
ref="batchEditDocumentDialogRef"
@refresh="refresh"
></BatchEditDocumentDialog>
<!-- 选择知识库 -->
<SelectDatasetDialog ref="SelectDatasetDialogRef" @refresh="refresh" />
</div>
@ -232,7 +228,6 @@ import documentApi from '@/api/document'
import ImportDocumentDialog from './component/ImportDocumentDialog.vue'
import SyncWebDialog from '@/views/dataset/component/SyncWebDialog.vue'
import SelectDatasetDialog from './component/SelectDatasetDialog.vue'
import BatchEditDocumentDialog from './component/BatchEditDocumentDialog.vue'
import { numberFormat } from '@/utils/utils'
import { datetimeFormat } from '@/utils/time'
import { hitHandlingMethod } from './utils'
@ -265,7 +260,7 @@ onBeforeRouteLeave((to: any, from: any) => {
})
const beforePagination = computed(() => common.paginationConfig[storeKey])
const beforeSearch = computed(() => common.search[storeKey])
const batchEditDocumentDialogRef = ref<InstanceType<typeof BatchEditDocumentDialog>>()
const SyncWebDialogRef = ref()
const loading = ref(false)
let interval: any
@ -326,8 +321,9 @@ const handleSelectionChange = (val: any[]) => {
}
function openBatchEditDocument() {
title.value = '设置'
const arr: string[] = multipleSelection.value.map((v) => v.id)
batchEditDocumentDialogRef?.value?.open(arr)
ImportDocumentDialogRef.value.open(null, arr)
}
/**