diff --git a/README.md b/README.md index 12535c263..8b2e4f4c3 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ docker run -d --name=maxkb -p 8080:8080 -v ~/.maxkb:/var/lib/postgresql/data 1pa 你也可以通过 [1Panel 应用商店](https://apps.fit2cloud.com/1panel) 快速部署 MaxKB + Ollama + Llama 2,30 分钟内即可上线基于本地大模型的知识库问答系统,并嵌入到第三方业务系统中。 +如果是内网环境,推荐使用 [离线安装包](https://community.fit2cloud.com/#/products/maxkb/downloads) 进行安装部署。 + 你也可以在线体验:[DataEase 小助手](https://dataease.io/docs/v2/),它是基于 MaxKB 搭建的智能问答系统,已经嵌入到 DataEase 产品及在线文档中。 如你有更多问题,可以查看使用手册,或者通过论坛与我们交流。 diff --git a/apps/application/serializers/application_serializers.py b/apps/application/serializers/application_serializers.py index cc4430f8d..88293462d 100644 --- a/apps/application/serializers/application_serializers.py +++ b/apps/application/serializers/application_serializers.py @@ -317,9 +317,9 @@ class ApplicationSerializer(serializers.Serializer): id = serializers.CharField(required=True, error_messages=ErrMessage.uuid("应用id")) user_id = serializers.UUIDField(required=False, error_messages=ErrMessage.uuid("用户id")) query_text = serializers.CharField(required=True, error_messages=ErrMessage.char("查询文本")) - top_number = serializers.IntegerField(required=True, max_value=10, min_value=1, + top_number = serializers.IntegerField(required=True, max_value=100, min_value=1, error_messages=ErrMessage.integer("topN")) - similarity = serializers.FloatField(required=True, max_value=1, min_value=0, + similarity = serializers.FloatField(required=True, max_value=2, min_value=0, error_messages=ErrMessage.float("相关度")) search_mode = serializers.CharField(required=True, validators=[ validators.RegexValidator(regex=re.compile("^embedding|keywords|blend$"), diff --git a/apps/application/serializers/chat_message_serializers.py b/apps/application/serializers/chat_message_serializers.py index f8c80a865..f529e7e8a 100644 --- a/apps/application/serializers/chat_message_serializers.py +++ b/apps/application/serializers/chat_message_serializers.py @@ -179,13 +179,12 @@ class ChatMessageSerializer(serializers.Serializer): return chat_info def chat(self): - self.is_valid(raise_exception=True) + chat_info = self.is_valid(raise_exception=True) message = self.data.get('message') re_chat = self.data.get('re_chat') stream = self.data.get('stream') client_id = self.data.get('client_id') client_type = self.data.get('client_type') - chat_info = self.is_valid(raise_exception=True) pipeline_manage_builder = PipelineManage.builder() # 如果开启了问题优化,则添加上问题优化步骤 if chat_info.application.problem_optimization: diff --git a/apps/application/serializers/chat_serializers.py b/apps/application/serializers/chat_serializers.py index bcbb04e29..3af6b0624 100644 --- a/apps/application/serializers/chat_serializers.py +++ b/apps/application/serializers/chat_serializers.py @@ -422,11 +422,11 @@ class ChatRecordSerializer(serializers.Serializer): return True class ImproveSerializer(serializers.Serializer): - title = serializers.CharField(required=False, allow_null=True, allow_blank=True, + title = serializers.CharField(required=False, max_length=256, allow_null=True, allow_blank=True, error_messages=ErrMessage.char("段落标题")) content = serializers.CharField(required=True, error_messages=ErrMessage.char("段落内容")) - problem_text = serializers.CharField(required=False, allow_null=True, allow_blank=True, + problem_text = serializers.CharField(required=False, max_length=256, allow_null=True, allow_blank=True, error_messages=ErrMessage.char("问题")) class ParagraphModel(serializers.ModelSerializer): diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index ea7c9b508..dea53cb13 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -6,6 +6,7 @@ @date:2023/10/20 14:01 @desc: """ +import datetime import logging import os import traceback @@ -143,7 +144,8 @@ class ListenerManagement: status = Status.error finally: # 修改状态 - QuerySet(Document).filter(id=document_id).update(**{'status': status}) + QuerySet(Document).filter(id=document_id).update( + **{'status': status, 'update_time': datetime.datetime.now()}) QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status}) max_kb.info(f"结束--->向量化文档:{document_id}") diff --git a/apps/common/handle/impl/html_split_handle.py b/apps/common/handle/impl/html_split_handle.py index 564f6db49..3116aabfd 100644 --- a/apps/common/handle/impl/html_split_handle.py +++ b/apps/common/handle/impl/html_split_handle.py @@ -37,14 +37,9 @@ def get_encoding(buffer): class HTMLSplitHandle(BaseSplitHandle): def support(self, file, get_buffer): - buffer = get_buffer(file) file_name: str = file.name.lower() if file_name.endswith(".html"): return True - result = detect(buffer) - if result['encoding'] is not None and result['confidence'] is not None and result['encoding'] != 'ascii' and \ - result['confidence'] > 0.5: - return True return False def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image): diff --git a/apps/common/init/init_doc.py b/apps/common/init/init_doc.py index 60ef21bcf..993dcca1b 100644 --- a/apps/common/init/init_doc.py +++ b/apps/common/init/init_doc.py @@ -8,7 +8,7 @@ """ import hashlib -from django.urls import re_path, path +from django.urls import re_path, path, URLPattern from drf_yasg import openapi from drf_yasg.views import get_schema_view from rest_framework import permissions @@ -46,11 +46,15 @@ def init_chat_doc(application_urlpatterns, patterns): public=True, permission_classes=[permissions.AllowAny], authentication_classes=[AnonymousAuthentication], - patterns=[url for url in patterns if - url.name is not None and ['application/message', 'application/open', - 'application/profile'].__contains__( - url.name)] + patterns=[ + URLPattern(pattern='api/' + str(url.pattern), callback=url.callback, default_args=url.default_args, + name=url.name) + for url in patterns if + url.name is not None and ['application/message', 'application/open', + 'application/profile'].__contains__( + url.name)] ) + application_urlpatterns += [ path('doc/chat/', chat_schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'), path('redoc/chat/', chat_schema_view.with_ui('redoc', cache_timeout=0), name='schema-redoc'), diff --git a/apps/common/util/split_model.py b/apps/common/util/split_model.py index 19b265fc6..10e827c30 100644 --- a/apps/common/util/split_model.py +++ b/apps/common/util/split_model.py @@ -163,7 +163,7 @@ def parse_level(text, pattern: str): :param pattern: 正则 :return: 符合正则的文本 """ - level_content_list = list(map(to_tree_obj, re_findall(pattern, text))) + level_content_list = list(map(to_tree_obj, [r[0:255] for r in re_findall(pattern, text) if r is not None])) return list(map(filter_special_symbol, level_content_list)) diff --git a/apps/dataset/serializers/dataset_serializers.py b/apps/dataset/serializers/dataset_serializers.py index 6894d9470..1e60a9103 100644 --- a/apps/dataset/serializers/dataset_serializers.py +++ b/apps/dataset/serializers/dataset_serializers.py @@ -535,9 +535,9 @@ class DataSetSerializers(serializers.ModelSerializer): id = serializers.CharField(required=True, error_messages=ErrMessage.char("id")) user_id = serializers.UUIDField(required=False, error_messages=ErrMessage.char("用户id")) query_text = serializers.CharField(required=True, error_messages=ErrMessage.char("查询文本")) - top_number = serializers.IntegerField(required=True, max_value=10, min_value=1, + top_number = serializers.IntegerField(required=True, max_value=100, min_value=1, error_messages=ErrMessage.char("响应Top")) - similarity = serializers.FloatField(required=True, max_value=1, min_value=0, + similarity = serializers.FloatField(required=True, max_value=2, min_value=0, error_messages=ErrMessage.char("相似度")) search_mode = serializers.CharField(required=True, validators=[ validators.RegexValidator(regex=re.compile("^embedding|keywords|blend$"), diff --git a/apps/smartdoc/conf.py b/apps/smartdoc/conf.py index 27e1e8b08..7b0188988 100644 --- a/apps/smartdoc/conf.py +++ b/apps/smartdoc/conf.py @@ -13,6 +13,7 @@ import os import re from importlib import import_module from urllib.parse import urljoin, urlparse + import yaml BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -75,25 +76,18 @@ class DoesNotExist(Exception): class Config(dict): defaults = { # 数据库相关配置 - "DB_HOST": "", - "DB_PORT": "", - "DB_USER": "", - "DB_PASSWORD": "", + "DB_HOST": "127.0.0.1", + "DB_PORT": 5432, + "DB_USER": "root", + "DB_PASSWORD": "Password123@postgres", "DB_ENGINE": "django.db.backends.postgresql_psycopg2", - # 邮件相关配置 - "EMAIL_ADDRESS": "", - "EMAIL_USE_TLS": False, - "EMAIL_USE_SSL": True, - "EMAIL_HOST": "", - "EMAIL_PORT": 465, - "EMAIL_HOST_USER": "", - "EMAIL_HOST_PASSWORD": "", # 向量模型 "EMBEDDING_MODEL_NAME": "shibing624/text2vec-base-chinese", "EMBEDDING_DEVICE": "cpu", "EMBEDDING_MODEL_PATH": os.path.join(PROJECT_DIR, 'models'), # 向量库配置 - "VECTOR_STORE_NAME": 'pg_vector' + "VECTOR_STORE_NAME": 'pg_vector', + "DEBUG": False } @@ -180,8 +174,36 @@ class ConfigManager: loaded = self.from_yaml(i) if loaded: return True + msg = f""" - return False + Error: No config file found. + + You can run `cp config_example.yml {self.root_path}/config.yml`, and edit it. + + """ + raise ImportError(msg) + + def load_from_env(self): + keys = os.environ.keys() + config = {key.replace('MAXKB_', ''): os.environ.get(key) for key in keys if key.startswith('MAXKB_')} + if len(config.keys()) <= 1: + msg = f""" + + Error: No config env found. + + Please set environment variables + MAXKB_CONFIG_TYPE: 配置文件读取方式 FILE: 使用配置文件配置 ENV: 使用ENV配置 + MAXKB_DB_NAME: 数据库名称 + MAXKB_DB_HOST: 数据库主机 + MAXKB_DB_PORT: 数据库端口 + MAXKB_DB_USER: 数据库用户名 + MAXKB_DB_PASSWORD: 数据库密码 + MAXKB_EMBEDDING_MODEL_PATH: 向量模型目录 + MAXKB_EMBEDDING_MODEL_NAME: 向量模型名称 + """ + raise ImportError(msg) + self.from_mapping(config) + return True @classmethod def load_user_config(cls, root_path=None, config_class=None): @@ -190,15 +212,10 @@ class ConfigManager: if not root_path: root_path = PROJECT_DIR manager = cls(root_path=root_path) - if manager.load_from_yml(): - config = manager.config + config_type = os.environ.get('MAXKB_CONFIG_TYPE') + if config_type is None or config_type != 'ENV': + manager.load_from_yml() else: - msg = f""" - - Error: No config file found. - - You can run `cp config_example.yml {root_path}/config.yml`, and edit it. - - """ - raise ImportError(msg) + manager.load_from_env() + config = manager.config return config diff --git a/config_example.yml b/config_example.yml index b43106427..e262de190 100644 --- a/config_example.yml +++ b/config_example.yml @@ -1,12 +1,3 @@ -# 邮箱配置 -EMAIL_ADDRESS: -EMAIL_USE_TLS: False -EMAIL_USE_SSL: True -EMAIL_HOST: smtp.qq.com -EMAIL_PORT: 465 -EMAIL_HOST_USER: -EMAIL_HOST_PASSWORD: - # 数据库链接信息 DB_NAME: maxkb DB_HOST: localhost diff --git a/installer/Dockerfile b/installer/Dockerfile index 6462a8f5d..90eeba54a 100644 --- a/installer/Dockerfile +++ b/installer/Dockerfile @@ -17,7 +17,6 @@ RUN apt-get update && \ COPY . /opt/maxkb/app RUN mkdir -p /opt/maxkb/app /opt/maxkb/model /opt/maxkb/conf && \ - cp -f /opt/maxkb/app/installer/config.yaml /opt/maxkb/conf && \ rm -rf /opt/maxkb/app/ui COPY --from=web-build ui /opt/maxkb/app/ui WORKDIR /opt/maxkb/app @@ -33,7 +32,16 @@ ARG DOCKER_IMAGE_TAG=dev \ BUILD_AT \ GITHUB_COMMIT -ENV MAXKB_VERSION ${DOCKER_IMAGE_TAG} (build at ${BUILD_AT}, commit: ${GITHUB_COMMIT}) +ENV MAXKB_VERSION="${DOCKER_IMAGE_TAG} (build at ${BUILD_AT}, commit: ${GITHUB_COMMIT})" \ + MAXKB_CONFIG_TYPE=ENV \ + MAXKB_DB_NAME=maxkb \ + MAXKB_DB_HOST=127.0.0.1 \ + MAXKB_DB_PORT=5432 \ + MAXKB_DB_USER=root \ + MAXKB_DB_PASSWORD=Password123@postgres \ + MAXKB_EMBEDDING_MODEL_NAME=/opt/maxkb/model/embedding/shibing624_text2vec-base-chinese \ + MAXKB_EMBEDDING_MODEL_PATH=/opt/maxkb/model/embedding + WORKDIR /opt/maxkb/app COPY --from=stage-build /opt/maxkb /opt/maxkb COPY --from=stage-build /opt/py3 /opt/py3 diff --git a/ui/src/assets/html-icon.svg b/ui/src/assets/html-icon.svg new file mode 100644 index 000000000..b59a48826 --- /dev/null +++ b/ui/src/assets/html-icon.svg @@ -0,0 +1,6 @@ + diff --git a/ui/src/assets/icon_condition.svg b/ui/src/assets/icon_condition.svg new file mode 100644 index 000000000..2bc80a212 --- /dev/null +++ b/ui/src/assets/icon_condition.svg @@ -0,0 +1,3 @@ + diff --git a/ui/src/assets/icon_hi.svg b/ui/src/assets/icon_hi.svg new file mode 100644 index 000000000..84bb36ac2 --- /dev/null +++ b/ui/src/assets/icon_hi.svg @@ -0,0 +1,5 @@ + diff --git a/ui/src/assets/icon_start.svg b/ui/src/assets/icon_start.svg new file mode 100644 index 000000000..0b8d73064 --- /dev/null +++ b/ui/src/assets/icon_start.svg @@ -0,0 +1,4 @@ + diff --git a/ui/src/components/ai-chat/OperationButton.vue b/ui/src/components/ai-chat/OperationButton.vue index 120641b87..a8ac52255 100644 --- a/ui/src/components/ai-chat/OperationButton.vue +++ b/ui/src/components/ai-chat/OperationButton.vue @@ -5,8 +5,8 @@
{{$t('views.applicationOverview.appInfo.EmbedDialog.embedDialogTitle')}}
++ {{ $t('views.applicationOverview.appInfo.EmbedDialog.fullscreenModeTitle') }} +
- {{$t('views.applicationOverview.appInfo.EmbedDialog.floatingModeTitle')}}
++ {{ $t('views.applicationOverview.appInfo.EmbedDialog.floatingModeTitle') }} +