diff --git a/README.md b/README.md index 372803d56..24212dc1a 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,14 @@ -[English](README_EN.md) | [中文](README.md) -

MaxKB

-

基于大语言模型和 RAG 的知识库问答系统

+

基于大模型和 RAG 的知识库问答系统

1Panel-dev%2FMaxKB | Trendshift 1Panel-dev%2FMaxKB | Aliyun

- License: GPL v3 - Codacy + English README + License: GPL v3 Latest release - Stars + Stars Download


@@ -22,7 +20,7 @@ MaxKB = Max Knowledge Base,是一款基于大语言模型和 RAG 的开源知 - **灵活编排**:内置强大的工作流引擎和函数库,支持编排 AI 工作过程,满足复杂业务场景下的需求; - **无缝嵌入**:支持零编码快速嵌入到第三方业务系统,让已有系统快速拥有智能问答能力,提高用户满意度。 -三分钟视频介绍:https://www.bilibili.com/video/BV18JypYeEkj/ +MaxKB 三分钟视频介绍:https://www.bilibili.com/video/BV18JypYeEkj/ ## 快速开始 @@ -52,7 +50,7 @@ docker run -d --name=maxkb --restart=always -p 8080:8080 -v C:/maxkb:/var/lib/po ## 案例展示 -MaxKB 自发布以来,日均安装下载超过 1000 次,被广泛应用于智能客服、企业内部知识库、学术研究与教育等场景。 +MaxKB 自发布以来,日均安装下载超过 1000 次,被广泛应用于智能客服、企业内部知识库、学术教育研究等场景。 - [华莱士智能客服](https://ai.cnhls.com/ui/chat/1fc0f6a9b5a6fb27) - [JumpServer 小助手](https://maxkb.fit2cloud.com/ui/chat/b4e27a6e72d349a3) @@ -78,7 +76,6 @@ MaxKB 自发布以来,日均安装下载超过 1000 次,被广泛应用于 - 后端:[Python / Django](https://www.djangoproject.com/) - LangChain:[LangChain](https://www.langchain.com/) - 向量数据库:[PostgreSQL / pgvector](https://www.postgresql.org/) -- 大模型:各种本地私有或者公共大模型 ## 飞致云的其他明星项目 diff --git a/README_EN.md b/README_EN.md index 8722c5cc1..3ac202629 100644 --- a/README_EN.md +++ b/README_EN.md @@ -1,5 +1,5 @@

MaxKB

-

Knowledge base, question answering system, based on LLM large language models

+

Top-Rated Retrieval-Augmented Generation (RAG) Chatbot.

1Panel-dev%2FMaxKB | Trendshift

License: GPL v3 @@ -10,12 +10,13 @@


-MaxKB = Max Knowledge Base,It is an open source knowledge base question and answer system based on the LLM large language model. It is widely used in enterprise internal knowledge bases, customer services, academic research and education and other scenarios. +MaxKB = Max Knowledge Base, it is a Chatbot based on Large Language Models (LLM) and Retrieval-Augmented Generation (RAG). MaxKB is widely applied in scenarios such as intelligent customer service, corporate internal knowledge bases, academic research, and education. + +- **Ready-to-Use**: Supports direct uploading of documents / automatic crawling of online documents, with features for automatic text splitting, vectorization, and RAG (Retrieval-Augmented Generation). This effectively reduces hallucinations in large models, providing a superior smart Q&A interaction experience. +- **Model-Agnostic**: Supports various large models, including private models (such as Llama 3, Qwen 2, etc.) and public models (like OpenAI, Claude, Gemini, etc.). +- **Flexible Orchestration**: Equipped with a powerful workflow engine and function library, enabling the orchestration of AI processes to meet the needs of complex business scenarios. +- **Seamless Integration**: Facilitates zero-coding rapid integration into third-party business systems, quickly equipping existing systems with intelligent Q&A capabilities to enhance user satisfaction. -- **Out-of-the-box**: Supports direct uploading of documents, automatic crawling of online documents, automatic text splitting, vectorization, RAG (retrieval enhancement generation), and a good interactive experience in intelligent question and answer; -- **Model neutral**: Supports docking with various large language models, including local private large models (Llama 3/Qwen 2, etc.), domestic public large models (Tongyi Qianwen/Zhipu AI/Baidu Qianfan/Kimi/DeepSeek, etc.) and foreign public models Large models (OpenAI / Azure OpenAI / Gemini, etc.); -- **Flexible Orchestration**: Built-in powerful workflow engine supports the orchestration of AI work processes to meet the needs of complex business scenarios; -- **Seamless Embedding**: Supports rapid embedding into third-party business systems with zero coding, allowing existing systems to quickly have intelligent question and answer capabilities and improve user satisfaction ## Quick start ``` @@ -25,20 +26,7 @@ docker run -d --name=maxkb --restart=always -p 8080:8080 -v ~/.maxkb:/var/lib/po # pass: MaxKB@123.. ``` -- You can also quickly deploy MaxKB + Ollama + Llama 3 through [1Panel App Store](https://apps.fit2cloud.com/1panel). A knowledge base question and answer system based on a local large model can be launched within 30 minutes and embedded into In third-party business systems; -- If it is an intranet environment, it is recommended to use [offline installation package](https://community.fit2cloud.com/#/products/maxkb/downloads) for installation and deployment; -- You can also experience it online: [DataEase Assistant](https://dataease.io/docs/v2/), which is an intelligent question and answer system based on MaxKB and has been embedded in DataEase products and online documents.; -- MaxKB's product version is divided into community version and professional version. For details, please see: [MaxKB product version comparison](https://maxkb.cn/pricing.html). - -If you have more questions, you can check the user manual or communicate with us through the forum. If you need to build a technical blog or knowledge base, it is recommended to use [Halo open source website building tool](https://github.com/halo-dev/halo/). You can experience Feizhiyun’s official [Technical Blog](https://blog.fit2cloud.com/) and [Knowledge Base](https://kb.fit2cloud.com) cases. -- [Docs](https://maxkb.cn/docs/) -- [Demo Vid](https://www.bilibili.com/video/BV1BE421M7YM/) -- [Forum](https://bbs.fit2cloud.com/c/mk/11) -- Technical exchange group - - - -## UI Screenshots +## Screenshots @@ -51,26 +39,15 @@ If you have more questions, you can check the user manual or communicate with us
-## Stack Used +## Technical Stack -- Frontend:[Vue.js](https://cn.vuejs.org/) +- Frontend:[Vue.js](https://vuejs.org/) - Backend:[Python / Django](https://www.djangoproject.com/) - LangChain:[LangChain](https://www.langchain.com/) - Vector DB:[PostgreSQL / pgvector](https://www.postgresql.org/) -- Large models: various local private or public large models - -## Other Projects From Feizhiyun - -- [1Panel](https://github.com/1panel-dev/1panel/) - Modern, open source Linux server operation and maintenance management panel -- [JumpServer](https://github.com/jumpserver/jumpserver/) - Popular open source bastion host -- [DataEase](https://github.com/dataease/dataease/) - Open source data visualization analysis tools available to everyone -- [MeterSphere](https://github.com/metersphere/metersphere/) - New generation of open-source test tools -- [Halo](https://github.com/halo-dev/halo/) - Powerful and easy-to-use open source website building tool ## License -Copyright (c) 2014-2024 Feizhiyun FIT2CLOUD, All rights reserved. - Licensed under The GNU General Public License version 3 (GPLv3) (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/apps/application/flow/step_node/application_node/impl/base_application_node.py b/apps/application/flow/step_node/application_node/impl/base_application_node.py index 5527cf489..17b30d0eb 100644 --- a/apps/application/flow/step_node/application_node/impl/base_application_node.py +++ b/apps/application/flow/step_node/application_node/impl/base_application_node.py @@ -52,6 +52,7 @@ def write_context_stream(node_variable: Dict, workflow_variable: Dict, node: INo runtime_node_id = response_content.get('runtime_node_id', '') chat_record_id = response_content.get('chat_record_id', '') child_node = response_content.get('child_node') + view_type = response_content.get('view_type') node_type = response_content.get('node_type') real_node_id = response_content.get('real_node_id') node_is_end = response_content.get('node_is_end', False) @@ -65,7 +66,8 @@ def write_context_stream(node_variable: Dict, workflow_variable: Dict, node: INo 'runtime_node_id': runtime_node_id, 'chat_record_id': chat_record_id, 'child_node': child_node, 'real_node_id': real_node_id, - 'node_is_end': node_is_end} + 'node_is_end': node_is_end, + 'view_type': view_type} usage = response_content.get('usage', {}) node_variable['result'] = {'usage': usage} node_variable['is_interrupt_exec'] = is_interrupt_exec diff --git a/apps/application/flow/step_node/function_lib_node/impl/base_function_lib_node.py b/apps/application/flow/step_node/function_lib_node/impl/base_function_lib_node.py index 273b84d97..f4c5b53ee 100644 --- a/apps/application/flow/step_node/function_lib_node/impl/base_function_lib_node.py +++ b/apps/application/flow/step_node/function_lib_node/impl/base_function_lib_node.py @@ -29,7 +29,7 @@ def write_context(step_variable: Dict, global_variable: Dict, node, workflow): if workflow.is_result(node, NodeResult(step_variable, global_variable)) and 'result' in step_variable: result = str(step_variable['result']) + '\n' yield result - workflow.answer += result + node.answer_text = result node.context['run_time'] = time.time() - node.context['start_time'] @@ -94,6 +94,7 @@ class BaseFunctionLibNodeNode(IFunctionLibNode): def save_context(self, details, workflow_manage): self.context['result'] = details.get('result') self.answer_text = details.get('result') + def execute(self, function_lib_id, input_field_list, **kwargs) -> NodeResult: function_lib = QuerySet(FunctionLib).filter(id=function_lib_id).first() if not function_lib.is_active: diff --git a/apps/application/flow/step_node/function_node/impl/base_function_node.py b/apps/application/flow/step_node/function_node/impl/base_function_node.py index 3336b308a..e9aac69dc 100644 --- a/apps/application/flow/step_node/function_node/impl/base_function_node.py +++ b/apps/application/flow/step_node/function_node/impl/base_function_node.py @@ -27,7 +27,7 @@ def write_context(step_variable: Dict, global_variable: Dict, node, workflow): if workflow.is_result(node, NodeResult(step_variable, global_variable)) and 'result' in step_variable: result = str(step_variable['result']) + '\n' yield result - workflow.answer += result + node.answer_text = result node.context['run_time'] = time.time() - node.context['start_time'] diff --git a/apps/application/flow/workflow_manage.py b/apps/application/flow/workflow_manage.py index b126387d5..e580a5a6a 100644 --- a/apps/application/flow/workflow_manage.py +++ b/apps/application/flow/workflow_manage.py @@ -454,6 +454,7 @@ class WorkflowManage: content = r child_node = {} node_is_end = False + view_type = current_node.view_type if isinstance(r, dict): content = r.get('content') child_node = {'runtime_node_id': r.get('runtime_node_id'), @@ -461,6 +462,7 @@ class WorkflowManage: , 'child_node': r.get('child_node')} real_node_id = r.get('real_node_id') node_is_end = r.get('node_is_end') + view_type = r.get('view_type') chunk = self.base_to_response.to_stream_chunk_response(self.params['chat_id'], self.params['chat_record_id'], current_node.id, @@ -468,7 +470,7 @@ class WorkflowManage: content, False, 0, 0, {'node_type': current_node.type, 'runtime_node_id': current_node.runtime_node_id, - 'view_type': current_node.view_type, + 'view_type': view_type, 'child_node': child_node, 'node_is_end': node_is_end, 'real_node_id': real_node_id}) diff --git a/apps/application/serializers/chat_message_serializers.py b/apps/application/serializers/chat_message_serializers.py index c6374c914..84e8376e0 100644 --- a/apps/application/serializers/chat_message_serializers.py +++ b/apps/application/serializers/chat_message_serializers.py @@ -341,10 +341,12 @@ class ChatMessageSerializer(serializers.Serializer): user_id = chat_info.application.user_id chat_record_id = self.data.get('chat_record_id') chat_record = None + history_chat_record = chat_info.chat_record_list if chat_record_id is not None: chat_record = self.get_chat_record(chat_info, chat_record_id) + history_chat_record = [r for r in chat_info.chat_record_list if str(r.id) != chat_record_id] work_flow_manage = WorkflowManage(Flow.new_instance(chat_info.work_flow_version.work_flow), - {'history_chat_record': chat_info.chat_record_list, 'question': message, + {'history_chat_record': history_chat_record, 'question': message, 'chat_id': chat_info.chat_id, 'chat_record_id': str( uuid.uuid1()) if chat_record is None else chat_record.id, 'stream': stream, diff --git a/apps/application/template/embed.js b/apps/application/template/embed.js index ec85ed540..a6d19b605 100644 --- a/apps/application/template/embed.js +++ b/apps/application/template/embed.js @@ -272,6 +272,7 @@ function initMaxkbStyle(root){ position: absolute; display: flex; align-items: center; + line-height: 18px; } #maxkb #maxkb-chat-container .maxkb-operate .maxkb-chat-close{ margin-left:15px; diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index 677251eae..80cfdccbe 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -143,6 +143,18 @@ class DocumentWebInstanceSerializer(ApiMixin, serializers.Serializer): required=True, description='知识库id'), ] + @staticmethod + def get_request_body_api(): + return openapi.Schema( + type=openapi.TYPE_OBJECT, + required=['source_url_list'], + properties={ + 'source_url_list': openapi.Schema(type=openapi.TYPE_ARRAY, title="文档地址列表", description="文档地址列表", + items=openapi.Schema(type=openapi.TYPE_STRING)), + 'selector': openapi.Schema(type=openapi.TYPE_STRING, title="选择器", description="选择器") + } + ) + class DocumentInstanceSerializer(ApiMixin, serializers.Serializer): diff --git a/apps/dataset/views/document.py b/apps/dataset/views/document.py index 4a98fb08b..87e0b886a 100644 --- a/apps/dataset/views/document.py +++ b/apps/dataset/views/document.py @@ -236,7 +236,7 @@ class Document(APIView): return result.success( DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).cancel( request.data - )) + )) class Refresh(APIView): authentication_classes = [TokenAuth] @@ -309,7 +309,7 @@ class Document(APIView): manual_parameters=DocumentSerializers.Operate.get_request_params_api(), tags=["知识库/文档"]) @has_permissions( - lambda r, k: Permission(group=Group.DATASET, operate=Operate.USE, + lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE, dynamic_tag=k.get('dataset_id'))) def get(self, request: Request, dataset_id: str, document_id: str): return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export() diff --git a/ui/src/api/type/application.ts b/ui/src/api/type/application.ts index 95812bbab..c1d07c968 100644 --- a/ui/src/api/type/application.ts +++ b/ui/src/api/type/application.ts @@ -251,9 +251,9 @@ export class ChatRecordManage { (node_info.divider_content ? node_info.divider_content.splice(0).join('') : '') + node_info.current_node.buffer.splice(0).join(''), node_info.answer_text_list_index, - current_node.chat_record_id, - current_node.runtime_node_id, - current_node.child_node + node_info.current_node.chat_record_id, + node_info.current_node.runtime_node_id, + node_info.current_node.child_node ) if (node_info.current_node.buffer.length == 0) { node_info.current_node.is_end = true diff --git a/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue b/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue index f103e48e2..c9c8bc5b6 100644 --- a/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue +++ b/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue @@ -154,9 +154,7 @@ function markdownToPlainText(md: string) { function removeFormRander(text: string) { return text - .replace('你好,请先填写下面表单内容:', '') - .replace(/[\s\S]*?<\/formrander>/, '') - .replace('填写后请点击【提交】按钮进行提交。', '') + .replace(/[\s\S]*?<\/form_rander>/g, '') .trim() } @@ -164,10 +162,11 @@ const playAnswerText = (text: string) => { if (!text) { text = '抱歉,没有查找到相关内容,请重新描述您的问题或提供更多信息。' } - // text 处理成纯文本 - text = markdownToPlainText(text) // 移除表单渲染器 text = removeFormRander(text) + // text 处理成纯文本 + text = markdownToPlainText(text) + // console.log(text) audioPlayerStatus.value = true if (props.tts_type === 'BROWSER') { if (text !== utterance.value?.text) { diff --git a/ui/src/components/ai-chat/component/operation-button/LogOperationButton.vue b/ui/src/components/ai-chat/component/operation-button/LogOperationButton.vue index 8057dae31..77d8470f0 100644 --- a/ui/src/components/ai-chat/component/operation-button/LogOperationButton.vue +++ b/ui/src/components/ai-chat/component/operation-button/LogOperationButton.vue @@ -133,9 +133,7 @@ function markdownToPlainText(md: string) { function removeFormRander(text: string) { return text - .replace('你好,请先填写下面表单内容:', '') - .replace(/[\s\S]*?<\/formrander>/, '') - .replace('填写后请点击【提交】按钮进行提交。', '') + .replace(/[\s\S]*?<\/form_rander>/g, '') .trim() } @@ -144,10 +142,11 @@ const playAnswerText = (text: string) => { if (!text) { text = '抱歉,没有查找到相关内容,请重新描述您的问题或提供更多信息。' } - // text 处理成纯文本 - text = markdownToPlainText(text) // 移除表单渲染器 text = removeFormRander(text) + // text 处理成纯文本 + text = markdownToPlainText(text) + // console.log(text) audioPlayerStatus.value = true if (props.tts_type === 'BROWSER') { if (text !== utterance.value?.text) { diff --git a/ui/src/request/index.ts b/ui/src/request/index.ts index 204fa297c..050b41e5d 100644 --- a/ui/src/request/index.ts +++ b/ui/src/request/index.ts @@ -225,8 +225,9 @@ export const exportExcel: ( params: any, loading?: NProgress | Ref ) => { - return promise(request({ url: url, method: 'get', params, responseType: 'blob' }), loading) - .then((res: any) => { + return promise(request({ url: url, method: 'get', params, responseType: 'blob' }), loading).then( + (res: any) => { + console.log(res) if (res) { const blob = new Blob([res], { type: 'application/vnd.ms-excel' @@ -239,8 +240,8 @@ export const exportExcel: ( window.URL.revokeObjectURL(link.href) } return true - }) - .catch((e) => {}) + } + ) } export const exportExcelPost: ( @@ -265,22 +266,20 @@ export const exportExcelPost: ( responseType: 'blob' }), loading - ) - .then((res: any) => { - if (res) { - const blob = new Blob([res], { - type: 'application/vnd.ms-excel' - }) - const link = document.createElement('a') - link.href = window.URL.createObjectURL(blob) - link.download = fileName - link.click() - // 释放内存 - window.URL.revokeObjectURL(link.href) - } - return true - }) - .catch((e) => {}) + ).then((res: any) => { + if (res) { + const blob = new Blob([res], { + type: 'application/vnd.ms-excel' + }) + const link = document.createElement('a') + link.href = window.URL.createObjectURL(blob) + link.download = fileName + link.click() + // 释放内存 + window.URL.revokeObjectURL(link.href) + } + return true + }) } export const download: ( diff --git a/ui/src/views/dataset/component/ParagraphList.vue b/ui/src/views/dataset/component/ParagraphList.vue index 3741f93dd..022bd8e35 100644 --- a/ui/src/views/dataset/component/ParagraphList.vue +++ b/ui/src/views/dataset/component/ParagraphList.vue @@ -48,7 +48,7 @@ import { cloneDeep } from 'lodash' import { ref, computed } from 'vue' import EditParagraphDialog from './EditParagraphDialog.vue' import { MsgConfirm } from '@/utils/message' -const page_size = ref(20) +const page_size = ref(30) const current_page = ref(1) const currentCIndex = ref(0) const EditParagraphDialogRef = ref() diff --git a/ui/src/views/dataset/index.vue b/ui/src/views/dataset/index.vue index d6d5b8a46..6f062af3c 100644 --- a/ui/src/views/dataset/index.vue +++ b/ui/src/views/dataset/index.vue @@ -160,7 +160,7 @@ const loading = ref(false) const datasetList = ref([]) const paginationConfig = reactive({ current_page: 1, - page_size: 20, + page_size: 30, total: 0 }) diff --git a/ui/src/views/document/index.vue b/ui/src/views/document/index.vue index c3e6052ef..a6df1fb0b 100644 --- a/ui/src/views/document/index.vue +++ b/ui/src/views/document/index.vue @@ -187,9 +187,10 @@ @@ -678,18 +679,24 @@ function deleteDocument(row: any) { 更新名称或状态 */ function updateData(documentId: string, data: any, msg: string) { - documentApi.putDocument(id, documentId, data, loading).then((res) => { - const index = documentData.value.findIndex((v) => v.id === documentId) - documentData.value.splice(index, 1, res.data) - MsgSuccess(msg) - }) + documentApi + .putDocument(id, documentId, data, loading) + .then((res) => { + const index = documentData.value.findIndex((v) => v.id === documentId) + documentData.value.splice(index, 1, res.data) + MsgSuccess(msg) + return true + }) + .catch(() => { + return false + }) } -function changeState(bool: Boolean, row: any) { +function changeState(row: any) { const obj = { - is_active: bool + is_active: !row.is_active } - const str = bool ? '启用成功' : '禁用成功' + const str = !row.is_active ? '启用成功' : '禁用成功' currentMouseId.value && updateData(row.id, obj, str) } diff --git a/ui/src/views/function-lib/index.vue b/ui/src/views/function-lib/index.vue index 43dfd1110..3e6770d77 100644 --- a/ui/src/views/function-lib/index.vue +++ b/ui/src/views/function-lib/index.vue @@ -142,7 +142,7 @@ const functionLibList = ref([]) const paginationConfig = reactive({ current_page: 1, - page_size: 20, + page_size: 30, total: 0 }) diff --git a/ui/src/views/paragraph/index.vue b/ui/src/views/paragraph/index.vue index 2cdc1b1de..19b39865e 100644 --- a/ui/src/views/paragraph/index.vue +++ b/ui/src/views/paragraph/index.vue @@ -104,8 +104,9 @@ >
@@ -162,7 +163,6 @@ -