From ba72889a183c611ec666ba329d23e9b0a4b8c850 Mon Sep 17 00:00:00 2001 From: liqiang-fit2cloud Date: Fri, 15 Mar 2024 13:03:01 +0800 Subject: [PATCH] =?UTF-8?q?build:=20allinone=E6=89=93=E5=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 - installer/config.yaml | 20 +++++++ installer/init.sql | 5 ++ installer/install-python.sh | 101 ++++++++++++++++++++++++++++++++++++ installer/install_model.py | 69 ++++++++++++++++++++++++ installer/run-maxkb.sh | 16 ++++++ 6 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 installer/config.yaml create mode 100644 installer/init.sql create mode 100644 installer/install-python.sh create mode 100644 installer/install_model.py create mode 100644 installer/run-maxkb.sh diff --git a/.gitignore b/.gitignore index a3dcf531b..7147555f8 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ __pycache__/ # Distribution / packaging .Python -installer/ develop-eggs/ dist/ downloads/ diff --git a/installer/config.yaml b/installer/config.yaml new file mode 100644 index 000000000..9e19ef0b3 --- /dev/null +++ b/installer/config.yaml @@ -0,0 +1,20 @@ +# 邮箱配置 +EMAIL_ADDRESS: ${EMAIL_ADDRESS} +EMAIL_USE_TLS: ${EMAIL_USE_TLS} +EMAIL_USE_SSL: ${EMAIL_USE_SSL} +EMAIL_HOST: ${EMAIL_HOST} +EMAIL_PORT: ${EMAIL_PORT} +EMAIL_HOST_USER: ${EMAIL_HOST_USER} +EMAIL_HOST_PASSWORD: ${EMAIL_HOST_PASSWORD} +# +# # 数据库链接信息 +DB_NAME: maxkb +DB_HOST: 127.0.0.1 +DB_PORT: 5432 +DB_USER: root +DB_PASSWORD: Password123@postgres +DB_ENGINE: django.db.backends.postgresql_psycopg2 +EMBEDDING_MODEL_PATH: /opt/maxkb/model/embedding +EMBEDDING_MODEL_NAME: /opt/maxkb/model/embedding/shibing624_text2vec-base-chinese + +DEBUG: false \ No newline at end of file diff --git a/installer/init.sql b/installer/init.sql new file mode 100644 index 000000000..98550891a --- /dev/null +++ b/installer/init.sql @@ -0,0 +1,5 @@ +CREATE DATABASE "maxkb"; + +\c "maxkb"; + +CREATE EXTENSION "vector" VERSION '0.5.1'; \ No newline at end of file diff --git a/installer/install-python.sh b/installer/install-python.sh new file mode 100644 index 000000000..0835f28cc --- /dev/null +++ b/installer/install-python.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# ---- prepare python env --- # +# ---- copy from https://github.com/docker-library/python/blob/master/3.11/slim-bookworm/Dockerfile --- # + +export PATH=/usr/local/bin:$PATH + +export LANG=C.UTF-8 + +apt-get update; +apt-get install -y --no-install-recommends ca-certificates netbase tzdata +rm -rf /var/lib/apt/lists/* + +export GPG_KEY=A035C8C19219BA821ECEA86B64E628F8D684696D +export PYTHON_VERSION=3.11.8 + +savedAptMark="$(apt-mark showmanual)" +apt-get update +apt-get install -y --no-install-recommends dpkg-dev gcc gnupg libbluetooth-dev libbz2-dev libc6-dev libdb-dev libexpat1-dev libffi-dev libgdbm-dev liblzma-dev libncursesw5-dev libreadline-dev libsqlite3-dev libssl-dev make \ + tk-dev uuid-dev wget xz-utils zlib1g-dev +wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" +wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" +GNUPGHOME="$(mktemp -d)" +export GNUPGHOME +gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY" +gpg --batch --verify python.tar.xz.asc python.tar.xz +gpgconf --kill all +rm -rf "$GNUPGHOME" python.tar.xz.asc +mkdir -p /usr/src/python +tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz +rm python.tar.xz; +cd /usr/src/python +gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" +./configure --build="$gnuArch" --enable-loadable-sqlite-extensions --enable-optimizations --enable-option-checking=fatal --enable-shared --with-lto --with-system-expat --without-ensurepip \ +nproc="$(nproc)" +EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)" +LDFLAGS="$(dpkg-buildflags --get LDFLAGS)" +LDFLAGS="${LDFLAGS:--Wl},--strip-all" +make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:-}" "PROFILE_TASK=${PROFILE_TASK:-}" + +rm python +make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" "PROFILE_TASK=${PROFILE_TASK:-}" python +make install +cd / +rm -rf /usr/src/python + +find /usr/local -depth \ + \( \ + \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \ + -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \ + \) -exec rm -rf '{}' +ldconfig +apt-mark auto '.*' > /dev/null +apt-mark manual $savedAptMark +find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec ldd '{}' ';' \ + | awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' \ + | sort -u \ + | xargs -r dpkg-query --search \ + | cut -d: -f1 \ + | sort -u \ + | xargs -r apt-mark manual +apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false +rm -rf /var/lib/apt/lists/* +python3 --version + +# make some useful symlinks that are expected to exist ("/usr/local/bin/python" and friends) + +for src in idle3 pydoc3 python3 python3-config; do \ + dst="$(echo "$src" | tr -d 3)"; \ + [ -s "/usr/local/bin/$src" ]; \ + [ ! -e "/usr/local/bin/$dst" ]; \ + ln -svT "$src" "/usr/local/bin/$dst"; \ +done + +# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value ''" +export PYTHON_PIP_VERSION=24.0 +# https://github.com/docker-library/python/issues/365 +export PYTHON_SETUPTOOLS_VERSION=65.5.1 +# https://github.com/pypa/get-pip +export PYTHON_GET_PIP_URL=https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py +export PYTHON_GET_PIP_SHA256=dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9 + + +savedAptMark="$(apt-mark showmanual)" +apt-get update +apt-get install -y --no-install-recommends wget + +wget -O get-pip.py "$PYTHON_GET_PIP_URL" +echo "$PYTHON_GET_PIP_SHA256 *get-pip.py" | sha256sum -c - +apt-mark auto '.*' > /dev/null +[ -z "$savedAptMark" ] || apt-mark manual $savedAptMark > /dev/null +apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false +rm -rf /var/lib/apt/lists/* + +export PYTHONDONTWRITEBYTECODE=1 + +python get-pip.py --disable-pip-version-check --no-cache-dir --no-compile "pip==$PYTHON_PIP_VERSION" "setuptools==$PYTHON_SETUPTOOLS_VERSION" + +rm -f get-pip.py + +pip --version \ No newline at end of file diff --git a/installer/install_model.py b/installer/install_model.py new file mode 100644 index 000000000..fb464611a --- /dev/null +++ b/installer/install_model.py @@ -0,0 +1,69 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:虎 + @file: install_model.py + @date:2023/12/18 14:02 + @desc: +""" +import json +import os.path +from pycrawlers import huggingface +from transformers import GPT2TokenizerFast +hg = huggingface() +prefix_dir = "./model" +model_config = [ + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-medium' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-large' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-xl' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'distilgpt2' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"], + 'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")] + }, + 'download_function': hg.get_batch_data + } + +] + + +def install(): + for model in model_config: + print(json.dumps(model.get('download_params'))) + model.get('download_function')(**model.get('download_params')) + + +if __name__ == '__main__': + install() diff --git a/installer/run-maxkb.sh b/installer/run-maxkb.sh new file mode 100644 index 000000000..4f388f695 --- /dev/null +++ b/installer/run-maxkb.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Start postgress +docker-entrypoint.sh postgres & + +# Wait postgress +until pg_isready --host=127.0.0.1; do sleep 1 && echo "waiting for postgres"; done + +# Start MaxKB +python /opt/maxkb/app/main.py start & + +# Wait for any process to exit +wait -n + +# Exit with status of process that exited first +exit $? \ No newline at end of file