Merge remote-tracking branch 'origin' into pr@main@chart-record

This commit is contained in:
wangdan-fit2cloud 2024-05-15 16:55:40 +08:00
commit c155907834
20 changed files with 203 additions and 82 deletions

View File

@ -1,7 +1,7 @@
<p align="center"><img src= "https://github.com/1Panel-dev/maxkb/assets/52996290/c0694996-0eed-40d8-b369-322bf2a380bf" alt="MaxKB" width="300" /></p>
<h3 align="center">基于 LLM 大语言模型的知识库问答系统</h3>
<p align="center">
<a href="https://www.gnu.org/licenses/old-licenses/gpl-3.0"><img src="https://img.shields.io/github/license/1Panel-dev/maxkb?color=%231890FF" alt="License: GPL v3"></a>
<a href="https://www.gnu.org/licenses/gpl-3.0.html#license-text"><img src="https://img.shields.io/github/license/1Panel-dev/maxkb?color=%231890FF" alt="License: GPL v3"></a>
<a href="https://app.codacy.com/gh/1Panel-dev/maxkb?utm_source=github.com&utm_medium=referral&utm_content=1Panel-dev/maxkb&utm_campaign=Badge_Grade_Dashboard"><img src="https://app.codacy.com/project/badge/Grade/da67574fd82b473992781d1386b937ef" alt="Codacy"></a>
<a href="https://github.com/1Panel-dev/maxkb/releases/latest"><img src="https://img.shields.io/github/v/release/1Panel-dev/maxkb" alt="Latest release"></a>
<a href="https://github.com/1Panel-dev/maxkb"><img src="https://img.shields.io/github/stars/1Panel-dev/maxkb?color=%231890FF&style=flat-square" alt="Stars"></a>

View File

@ -15,7 +15,7 @@ from drf_yasg import openapi
from rest_framework import serializers
from common.db.search import page_search
from common.event.listener_manage import ListenerManagement, UpdateEmbeddingDocumentIdArgs, UpdateEmbeddingDatasetIdArgs
from common.event.listener_manage import ListenerManagement, UpdateEmbeddingDocumentIdArgs
from common.exception.app_exception import AppApiException
from common.mixins.api_mixin import ApiMixin
from common.util.common import post
@ -284,6 +284,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
paragraph_id_list = instance.get("id_list")
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
QuerySet(ProblemParagraphMapping).filter(paragraph_id__in=paragraph_id_list).delete()
update_document_char_length(self.data.get('document_id'))
# 删除向量库
ListenerManagement.delete_embedding_by_paragraph_ids(paragraph_id_list)
return True
@ -370,6 +371,8 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
target_document_id, target_dataset_id))
# 修改段落信息
paragraph_list.update(dataset_id=target_dataset_id, document_id=target_document_id)
update_document_char_length(document_id)
update_document_char_length(target_document_id)
@staticmethod
def update_problem_paragraph_mapping(target_document_id: str, problem_paragraph_mapping):
@ -527,6 +530,7 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
paragraph_id = self.data.get('paragraph_id')
QuerySet(Paragraph).filter(id=paragraph_id).delete()
QuerySet(ProblemParagraphMapping).filter(paragraph_id=paragraph_id).delete()
update_document_char_length(self.data.get('document_id'))
ListenerManagement.delete_embedding_by_paragraph_signal.send(paragraph_id)
@staticmethod

View File

@ -17,6 +17,7 @@ from setting.models_provider.impl.kimi_model_provider.kimi_model_provider import
from setting.models_provider.impl.xf_model_provider.xf_model_provider import XunFeiModelProvider
from setting.models_provider.impl.zhipu_model_provider.zhipu_model_provider import ZhiPuModelProvider
from setting.models_provider.impl.deepseek_model_provider.deepseek_model_provider import DeepSeekModelProvider
from setting.models_provider.impl.gemini_model_provider.gemini_model_provider import GeminiModelProvider
class ModelProvideConstants(Enum):
@ -29,3 +30,4 @@ class ModelProvideConstants(Enum):
model_zhipu_provider = ZhiPuModelProvider()
model_xf_provider = XunFeiModelProvider()
model_deepseek_provider = DeepSeekModelProvider()
model_gemini_provider = GeminiModelProvider()

View File

@ -21,43 +21,6 @@ from setting.models_provider.base_model_provider import IModelProvider, ModelPro
from setting.models_provider.impl.azure_model_provider.model.azure_chat_model import AzureChatModel
from smartdoc.conf import PROJECT_DIR
"""
class AzureLLMModelCredential(BaseForm, BaseModelCredential):
def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], raise_exception=False):
model_type_list = AzureModelProvider().get_model_type_list()
if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))):
raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持')
for key in ['api_base', 'api_key', 'deployment_name']:
if key not in model_credential:
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段')
else:
return False
try:
model = AzureModelProvider().get_model(model_type, model_name, model_credential)
model.invoke([HumanMessage(content='你好')])
except Exception as e:
if isinstance(e, AppApiException):
raise e
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, '校验失败,请检查参数是否正确')
else:
return False
return True
def encryption_dict(self, model: Dict[str, object]):
return {**model, 'api_key': super().encryption(model.get('api_key', ''))}
api_base = forms.TextInputField('API 版本 (api_version)', required=True)
api_key = forms.PasswordInputField("API KeyAPI 密钥)", required=True)
deployment_name = forms.TextInputField("部署名deployment_name", required=True)
"""
class DefaultAzureLLMModelCredential(BaseForm, BaseModelCredential):
@ -97,8 +60,6 @@ class DefaultAzureLLMModelCredential(BaseForm, BaseModelCredential):
deployment_name = forms.TextInputField("部署名 (deployment_name)", required=True)
# azure_llm_model_credential: AzureLLMModelCredential = AzureLLMModelCredential()
base_azure_llm_model_credential = DefaultAzureLLMModelCredential()
model_dict = {
@ -114,7 +75,6 @@ class AzureModelProvider(IModelProvider):
return 3
def get_model(self, model_type, model_name, model_credential: Dict[str, object], **model_kwargs) -> AzureChatModel:
model_info: ModelInfo = model_dict.get(model_name)
azure_chat_open_ai = AzureChatModel(
azure_endpoint=model_credential.get('api_base'),
openai_api_version=model_credential.get('api_version', '2024-02-15-preview'),

View File

@ -16,9 +16,15 @@ from common.config.tokenizer_manage_config import TokenizerManage
class AzureChatModel(AzureChatOpenAI):
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
tokenizer = TokenizerManage.get_tokenizer()
return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages])
try:
return super().get_num_tokens_from_messages(messages)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages])
def get_num_tokens(self, text: str) -> int:
tokenizer = TokenizerManage.get_tokenizer()
return len(tokenizer.encode(text))
try:
return super().get_num_tokens(text)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return len(tokenizer.encode(text))

View File

@ -0,0 +1,8 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Project MaxKB
@File __init__.py.py
@Author Brian Yang
@Date 5/13/24 7:40 AM
"""

View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Project MaxKB
@File gemini_model_provider.py
@Author Brian Yang
@Date 5/13/24 7:47 AM
"""
import os
from typing import Dict
from langchain.schema import HumanMessage
from common import forms
from common.exception.app_exception import AppApiException
from common.forms import BaseForm
from common.util.file_util import get_file_content
from setting.models_provider.base_model_provider import IModelProvider, ModelProvideInfo, BaseModelCredential, \
ModelInfo, ModelTypeConst, ValidCode
from setting.models_provider.impl.gemini_model_provider.model.gemini_chat_model import GeminiChatModel
from smartdoc.conf import PROJECT_DIR
class GeminiLLMModelCredential(BaseForm, BaseModelCredential):
def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], raise_exception=False):
model_type_list = GeminiModelProvider().get_model_type_list()
if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))):
raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持')
for key in ['api_key']:
if key not in model_credential:
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段')
else:
return False
try:
model = GeminiModelProvider().get_model(model_type, model_name, model_credential)
model.invoke([HumanMessage(content='你好')])
except Exception as e:
if isinstance(e, AppApiException):
raise e
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}')
else:
return False
return True
def encryption_dict(self, model: Dict[str, object]):
return {**model, 'api_key': super().encryption(model.get('api_key', ''))}
api_key = forms.PasswordInputField('API Key', required=True)
gemini_llm_model_credential = GeminiLLMModelCredential()
model_dict = {
'gemini-1.0-pro': ModelInfo('gemini-1.0-pro', '最新的Gemini 1.0 Pro模型随Google更新而更新',
ModelTypeConst.LLM,
gemini_llm_model_credential,
),
'gemini-1.0-pro-vision': ModelInfo('gemini-1.0-pro-vision', '最新的Gemini 1.0 Pro Vision模型随Google更新而更新',
ModelTypeConst.LLM,
gemini_llm_model_credential,
),
}
class GeminiModelProvider(IModelProvider):
def get_dialogue_number(self):
return 3
def get_model(self, model_type, model_name, model_credential: Dict[str, object],
**model_kwargs) -> GeminiChatModel:
gemini_chat = GeminiChatModel(
model=model_name,
google_api_key=model_credential.get('api_key')
)
return gemini_chat
def get_model_credential(self, model_type, model_name):
if model_name in model_dict:
return model_dict.get(model_name).model_credential
return gemini_llm_model_credential
def get_model_provide_info(self):
return ModelProvideInfo(provider='model_gemini_provider', name='Gemini', icon=get_file_content(
os.path.join(PROJECT_DIR, "apps", "setting", 'models_provider', 'impl', 'gemini_model_provider', 'icon',
'gemini_icon_svg')))
def get_model_list(self, model_type: str):
if model_type is None:
raise AppApiException(500, '模型类型不能为空')
return [model_dict.get(key).to_dict() for key in
list(filter(lambda key: model_dict.get(key).model_type == model_type, model_dict.keys()))]
def get_model_type_list(self):
return [{'key': "大语言模型", 'value': "LLM"}]

View File

@ -0,0 +1,10 @@
<svg width="100%" height="100%" viewBox="0 0 28 28" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M14 28C14 26.0633 13.6267 24.2433 12.88 22.54C12.1567 20.8367 11.165 19.355 9.905 18.095C8.645 16.835 7.16333 15.8433 5.46 15.12C3.75667 14.3733 1.93667 14 0 14C1.93667 14 3.75667 13.6383 5.46 12.915C7.16333 12.1683 8.645 11.165 9.905 9.905C11.165 8.645 12.1567 7.16333 12.88 5.46C13.6267 3.75667 14 1.93667 14 0C14 1.93667 14.3617 3.75667 15.085 5.46C15.8317 7.16333 16.835 8.645 18.095 9.905C19.355 11.165 20.8367 12.1683 22.54 12.915C24.2433 13.6383 26.0633 14 28 14C26.0633 14 24.2433 14.3733 22.54 15.12C20.8367 15.8433 19.355 16.835 18.095 18.095C16.835 19.355 15.8317 20.8367 15.085 22.54C14.3617 24.2433 14 26.0633 14 28Z" fill="url(#paint0_radial_16771_53212)"/>
<defs>
<radialGradient id="paint0_radial_16771_53212" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse" gradientTransform="translate(2.77876 11.3795) rotate(18.6832) scale(29.8025 238.737)">
<stop offset="0.0671246" stop-color="#9168C0"/>
<stop offset="0.342551" stop-color="#5684D1"/>
<stop offset="0.672076" stop-color="#1BA1E3"/>
</radialGradient>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Project MaxKB
@File gemini_chat_model.py
@Author Brian Yang
@Date 5/13/24 7:40 AM
"""
from typing import List
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_google_genai import ChatGoogleGenerativeAI
from common.config.tokenizer_manage_config import TokenizerManage
class GeminiChatModel(ChatGoogleGenerativeAI):
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
try:
return super().get_num_tokens_from_messages(messages)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages])
def get_num_tokens(self, text: str) -> int:
try:
return super().get_num_tokens(text)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return len(tokenizer.encode(text))

View File

@ -35,7 +35,7 @@ class OllamaLLMModelCredential(BaseForm, BaseModelCredential):
model_list = OllamaModelProvider.get_base_model_list(model_credential.get('api_base'))
except Exception as e:
raise AppApiException(ValidCode.valid_error.value, "API 域名无效")
exist = [model for model in model_list.get('models') if
exist = [model for model in (model_list.get('models') if model_list.get('models') is not None else []) if
model.get('model') == model_name or model.get('model').replace(":latest", "") == model_name]
if len(exist) == 0:
raise AppApiException(ValidCode.model_not_fount, "模型不存在,请先下载模型")

View File

@ -60,6 +60,17 @@ model_dict = {
'gpt-3.5-turbo': ModelInfo('gpt-3.5-turbo', '最新的gpt-3.5-turbo随OpenAI调整而更新', ModelTypeConst.LLM,
openai_llm_model_credential,
),
'gpt-4': ModelInfo('gpt-4', '最新的gpt-4随OpenAI调整而更新', ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4o': ModelInfo('gpt-4o', '最新的GPT-4o比gpt-4-turbo更便宜、更快随OpenAI调整而更新',
ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4-turbo': ModelInfo('gpt-4-turbo', '最新的gpt-4-turbo随OpenAI调整而更新', ModelTypeConst.LLM,
openai_llm_model_credential,
),
'gpt-4-turbo-preview': ModelInfo('gpt-4-turbo-preview', '最新的gpt-4-turbo-preview随OpenAI调整而更新',
ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-3.5-turbo-0125': ModelInfo('gpt-3.5-turbo-0125',
'2024年1月25日的gpt-3.5-turbo快照支持上下文长度16,385 tokens', ModelTypeConst.LLM,
openai_llm_model_credential,
@ -72,14 +83,10 @@ model_dict = {
'[Legacy] 2023年6月13日的gpt-3.5-turbo快照将于2024年6月13日弃用',
ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4': ModelInfo('gpt-4', '最新的gpt-4随OpenAI调整而更新', ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4-turbo': ModelInfo('gpt-4-turbo', '最新的gpt-4-turbo随OpenAI调整而更新', ModelTypeConst.LLM,
openai_llm_model_credential,
),
'gpt-4-turbo-preview': ModelInfo('gpt-4-turbo-preview', '最新的gpt-4-turbo-preview随OpenAI调整而更新',
ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4o-2024-05-13': ModelInfo('gpt-4o-2024-05-13',
'2024年5月13日的gpt-4o快照支持上下文长度128,000 tokens',
ModelTypeConst.LLM, openai_llm_model_credential,
),
'gpt-4-turbo-2024-04-09': ModelInfo('gpt-4-turbo-2024-04-09',
'2024年4月9日的gpt-4-turbo快照支持上下文长度128,000 tokens',
ModelTypeConst.LLM, openai_llm_model_credential,

View File

@ -37,6 +37,7 @@ zhipuai = "^2.0.1"
httpx = "^0.27.0"
httpx-sse = "^0.4.0"
websocket-client = "^1.7.0"
langchain-google-genai = "^1.0.3"
[build-system]
requires = ["poetry-core"]

View File

@ -31,7 +31,7 @@
"markdown-it-sup": "^1.0.0",
"markdown-it-task-lists": "^2.1.1",
"markdown-it-toc-done-right": "^4.2.0",
"md-editor-v3": "^4.12.1",
"md-editor-v3": "4.12.1",
"medium-zoom": "^1.1.0",
"mermaid": "^10.9.0",
"mitt": "^3.0.0",

View File

@ -135,6 +135,7 @@ onMounted(() => {
overflow: hidden;
position: relative;
}
&__footer {
background: #f3f7f9;
height: 80px;

View File

@ -95,9 +95,9 @@ function delProblemHandle(item: any, index: number) {
detail.value.problem_list.splice(index, 1)
}
function addProblemHandle() {
if (problemValue.value) {
if (problemValue.value.trim()) {
detail.value?.problem_list?.push({
content: problemValue.value
content: problemValue.value.trim()
})
problemValue.value = ''
isAddProblem.value = false

View File

@ -136,10 +136,10 @@ function changeHandle(val: boolean) {
const list = paragraphList.value
list.map((item: any) => {
item.content.map((v: any) => {
v['problem_list'] = v.title
v['problem_list'] = v.title.trim()
? [
{
content: v.title
content: v.title.trim()
}
]
: []
@ -173,17 +173,17 @@ function splitDocument() {
if (checkedConnect.value) {
list.map((item: any) => {
item.content.map((v: any) => {
v['problem_list'] = v.title
v['problem_list'] = v.title.trim()
? [
{
content: v.title
content: v.title.trim()
}
]
: []
})
})
}
paragraphList.value = res.data
paragraphList.value = list
loading.value = false
})
.catch(() => {

View File

@ -21,7 +21,11 @@
type="textarea"
/>
</el-form-item>
<el-form-item v-else-if="documentType === '1'" label="文档地址" prop="source_url">
<el-form-item
v-else-if="!isImport && documentType === '1'"
label="文档地址"
prop="source_url"
>
<el-input v-model="form.source_url" placeholder="请输入文档地址" />
</el-form-item>
<el-form-item label="选择器" v-if="documentType === '1'">
@ -124,6 +128,7 @@ watch(dialogVisible, (bool) => {
}
isImport.value = false
documentType.value = ''
documentId.value = ''
documentList.value = []
}
})
@ -142,7 +147,8 @@ const open = (row: any, list: Array<string>) => {
//
documentList.value = list
} else {
//
// web
documentType.value = '1'
isImport.value = true
}
dialogVisible.value = true
@ -181,7 +187,7 @@ const submit = async (formEl: FormInstance | undefined) => {
//
const obj = {
hit_handling_method: form.value.hit_handling_method,
directly_return_similarity: form.value.directly_return_similarity,
directly_return_similarity: form.value.directly_return_similarity || 0.9,
id_list: documentList.value
}
documentApi.batchEditHitHandling(id, obj, loading).then((res: any) => {

View File

@ -134,7 +134,6 @@ import type { FormField } from '@/components/dynamics-form/type'
import DynamicsForm from '@/components/dynamics-form/index.vue'
import type { FormRules } from 'element-plus'
import { MsgSuccess } from '@/utils/message'
import { QuestionFilled } from '@element-plus/icons-vue'
const providerValue = ref<Provider>()
const dynamicsFormRef = ref<InstanceType<typeof DynamicsForm>>()
@ -214,6 +213,7 @@ const list_base_model = (model_type: any) => {
const close = () => {
base_form_data.value = { name: '', model_type: '', model_name: '' }
credential_form_data.value = {}
model_form_field.value = []
dialogVisible.value = false
}
const submit = () => {

View File

@ -128,7 +128,6 @@ import type { FormField } from '@/components/dynamics-form/type'
import DynamicsForm from '@/components/dynamics-form/index.vue'
import type { FormRules } from 'element-plus'
import { MsgSuccess } from '@/utils/message'
import { QuestionFilled } from '@element-plus/icons-vue'
import AppIcon from '@/components/icons/AppIcon.vue'
const providerValue = ref<Provider>()
@ -218,6 +217,7 @@ const close = () => {
base_form_data.value = { name: '', model_type: '', model_name: '' }
dynamicsFormRef.value?.ruleFormRef?.resetFields()
credential_form_data.value = {}
model_form_field.value = []
dialogVisible.value = false
}

View File

@ -70,7 +70,6 @@
ref="createModelRef"
@submit="list_model"
@change="openCreateModel($event)"
:key="dialogState.createModelDialogKey"
></CreateModelDialog>
<SelectProviderDialog
@ -81,8 +80,7 @@
</template>
<script lang="ts" setup>
import { ElMessage } from 'element-plus'
import { onMounted, ref, computed, reactive } from 'vue'
import { onMounted, ref, computed } from 'vue'
import ModelApi from '@/api/model'
import type { Provider, Model } from '@/api/type/model'
import AppIcon from '@/components/icons/AppIcon.vue'
@ -129,7 +127,6 @@ const openCreateModel = (provider?: Provider) => {
createModelRef.value?.open(provider)
} else {
selectProviderRef.value?.open()
refreshCreateModelDialogKey() // key
}
}
@ -140,16 +137,6 @@ const list_model = () => {
})
}
// statedialogkey
const dialogState = reactive({
createModelDialogKey: Date.now() //
})
// dialogState.createModelDialogKey
const refreshCreateModelDialogKey = () => {
dialogState.createModelDialogKey = Date.now() //
}
onMounted(() => {
ModelApi.getProvider(loading).then((ok) => {
active_provider.value = allObj