From 985d476207dba40f25fa5f8fc4af8c5fdc483617 Mon Sep 17 00:00:00 2001 From: liqiang-fit2cloud Date: Fri, 15 Mar 2024 12:58:46 +0800 Subject: [PATCH] =?UTF-8?q?build:=20allinone=E6=89=93=E5=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 174 ++--------------------------------------- install_model.py | 69 ---------------- installer/config.yaml | 20 ----- installer/init.sql | 5 -- installer/run-maxkb.sh | 16 ---- 5 files changed, 8 insertions(+), 276 deletions(-) delete mode 100644 install_model.py delete mode 100644 installer/config.yaml delete mode 100644 installer/init.sql delete mode 100644 installer/run-maxkb.sh diff --git a/Dockerfile b/Dockerfile index c7cab0d44..db573c475 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.11-slim as vector-model-build -COPY install_model.py install_model.py +COPY build/install_model.py install_model.py RUN pip3 install --upgrade pip setuptools && \ pip install pycrawlers && \ pip install transformers && \ @@ -20,7 +20,7 @@ RUN apt-get update RUN apt-get install postgresql-15-pgvector -COPY installer/init.sql /docker-entrypoint-initdb.d +COPY build/init.sql /docker-entrypoint-initdb.d RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone @@ -28,179 +28,21 @@ ENV POSTGRES_USER root ENV POSTGRES_PASSWORD Password123@postgres + # ---- prepare python env --- # -# ---- copy from https://github.com/docker-library/python/blob/master/3.11/slim-bookworm/Dockerfile --- # -# ensure local python is preferred over distribution python ENV PATH /usr/local/bin:$PATH - -# http://bugs.python.org/issue19846 -# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. -ENV LANG C.UTF-8 - -# runtime dependencies -RUN set -eux; \ - apt-get update; \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - netbase \ - tzdata \ - ; \ - rm -rf /var/lib/apt/lists/* - -ENV GPG_KEY A035C8C19219BA821ECEA86B64E628F8D684696D +COPY build/install-python.sh /install-python.sh +RUN chmod 755 /install-python.sh; bash -c "/install-python.sh > /dev/null 2>&1" ; rm -f /install-python.sh ENV PYTHON_VERSION 3.11.8 -RUN set -eux; \ - \ - savedAptMark="$(apt-mark showmanual)"; \ - apt-get update; \ - apt-get install -y --no-install-recommends \ - dpkg-dev \ - gcc \ - gnupg \ - libbluetooth-dev \ - libbz2-dev \ - libc6-dev \ - libdb-dev \ - libexpat1-dev \ - libffi-dev \ - libgdbm-dev \ - liblzma-dev \ - libncursesw5-dev \ - libreadline-dev \ - libsqlite3-dev \ - libssl-dev \ - make \ - tk-dev \ - uuid-dev \ - wget \ - xz-utils \ - zlib1g-dev \ - ; \ - \ - wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; \ - wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc"; \ - GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; \ - gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY"; \ - gpg --batch --verify python.tar.xz.asc python.tar.xz; \ - gpgconf --kill all; \ - rm -rf "$GNUPGHOME" python.tar.xz.asc; \ - mkdir -p /usr/src/python; \ - tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; \ - rm python.tar.xz; \ - \ - cd /usr/src/python; \ - gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; \ - ./configure \ - --build="$gnuArch" \ - --enable-loadable-sqlite-extensions \ - --enable-optimizations \ - --enable-option-checking=fatal \ - --enable-shared \ - --with-lto \ - --with-system-expat \ - --without-ensurepip \ - ; \ - nproc="$(nproc)"; \ - EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; \ - LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; \ - LDFLAGS="${LDFLAGS:--Wl},--strip-all"; \ - make -j "$nproc" \ - "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \ - "LDFLAGS=${LDFLAGS:-}" \ - "PROFILE_TASK=${PROFILE_TASK:-}" \ - ; \ -# https://github.com/docker-library/python/issues/784 -# prevent accidental usage of a system installed libpython of the same version - rm python; \ - make -j "$nproc" \ - "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \ - "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" \ - "PROFILE_TASK=${PROFILE_TASK:-}" \ - python \ - ; \ - make install; \ - \ - cd /; \ - rm -rf /usr/src/python; \ - \ - find /usr/local -depth \ - \( \ - \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \ - -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \ - \) -exec rm -rf '{}' + \ - ; \ - \ - ldconfig; \ - \ - apt-mark auto '.*' > /dev/null; \ - apt-mark manual $savedAptMark; \ - find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec ldd '{}' ';' \ - | awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' \ - | sort -u \ - | xargs -r dpkg-query --search \ - | cut -d: -f1 \ - | sort -u \ - | xargs -r apt-mark manual \ - ; \ - apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \ - rm -rf /var/lib/apt/lists/*; \ - \ - python3 --version - -# make some useful symlinks that are expected to exist ("/usr/local/bin/python" and friends) -RUN set -eux; \ - for src in idle3 pydoc3 python3 python3-config; do \ - dst="$(echo "$src" | tr -d 3)"; \ - [ -s "/usr/local/bin/$src" ]; \ - [ ! -e "/usr/local/bin/$dst" ]; \ - ln -svT "$src" "/usr/local/bin/$dst"; \ - done - -# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value ''" -ENV PYTHON_PIP_VERSION 24.0 -# https://github.com/docker-library/python/issues/365 -ENV PYTHON_SETUPTOOLS_VERSION 65.5.1 -# https://github.com/pypa/get-pip -ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py -ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9 - -RUN set -eux; \ - \ - savedAptMark="$(apt-mark showmanual)"; \ - apt-get update; \ - apt-get install -y --no-install-recommends wget; \ - \ - wget -O get-pip.py "$PYTHON_GET_PIP_URL"; \ - echo "$PYTHON_GET_PIP_SHA256 *get-pip.py" | sha256sum -c -; \ - \ - apt-mark auto '.*' > /dev/null; \ - [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark > /dev/null; \ - apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \ - rm -rf /var/lib/apt/lists/*; \ - \ - export PYTHONDONTWRITEBYTECODE=1; \ - \ - python get-pip.py \ - --disable-pip-version-check \ - --no-cache-dir \ - --no-compile \ - "pip==$PYTHON_PIP_VERSION" \ - "setuptools==$PYTHON_SETUPTOOLS_VERSION" \ - ; \ - rm -f get-pip.py; \ - \ - pip --version - # ---- build maxkb --- # - # 创建工作目录 RUN mkdir -p /opt/maxkb/app && mkdir -p /opt/maxkb/model && mkdir -p /opt/maxkb/conf VOLUME /opt/maxkb # 拷贝项目 COPY . /opt/maxkb/app -COPY installer/config.yaml /opt/maxkb/conf -RUN rm -rf /opt/maxkb/app/ui /opt/maxkb/app/installer +COPY build/config.yaml /opt/maxkb/conf +RUN rm -rf /opt/maxkb/app/ui /opt/maxkb/app/build COPY --from=vector-model-build model /opt/maxkb/app/model COPY --from=web-build ui /opt/maxkb/app/ui RUN ls -la /opt/maxkb/app @@ -222,7 +64,7 @@ RUN poetry export -f requirements.txt --output requirements.txt --without-hashes RUN pip3 install --no-cache-dir -r requirements.txt EXPOSE 8000 # 启动命令 -COPY installer/run-maxkb.sh /usr/bin/ +COPY build/run-maxkb.sh /usr/bin/ RUN chmod 755 /usr/bin/run-maxkb.sh ENTRYPOINT ["bash", "-c"] CMD [ "/usr/bin/run-maxkb.sh" ] \ No newline at end of file diff --git a/install_model.py b/install_model.py deleted file mode 100644 index fb464611a..000000000 --- a/install_model.py +++ /dev/null @@ -1,69 +0,0 @@ -# coding=utf-8 -""" - @project: maxkb - @Author:虎 - @file: install_model.py - @date:2023/12/18 14:02 - @desc: -""" -import json -import os.path -from pycrawlers import huggingface -from transformers import GPT2TokenizerFast -hg = huggingface() -prefix_dir = "./model" -model_config = [ - { - 'download_params': { - 'cache_dir': os.path.join(prefix_dir, 'base/hub'), - 'pretrained_model_name_or_path': 'gpt2' - }, - 'download_function': GPT2TokenizerFast.from_pretrained - }, - { - 'download_params': { - 'cache_dir': os.path.join(prefix_dir, 'base/hub'), - 'pretrained_model_name_or_path': 'gpt2-medium' - }, - 'download_function': GPT2TokenizerFast.from_pretrained - }, - { - 'download_params': { - 'cache_dir': os.path.join(prefix_dir, 'base/hub'), - 'pretrained_model_name_or_path': 'gpt2-large' - }, - 'download_function': GPT2TokenizerFast.from_pretrained - }, - { - 'download_params': { - 'cache_dir': os.path.join(prefix_dir, 'base/hub'), - 'pretrained_model_name_or_path': 'gpt2-xl' - }, - 'download_function': GPT2TokenizerFast.from_pretrained - }, - { - 'download_params': { - 'cache_dir': os.path.join(prefix_dir, 'base/hub'), - 'pretrained_model_name_or_path': 'distilgpt2' - }, - 'download_function': GPT2TokenizerFast.from_pretrained - }, - { - 'download_params': { - 'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"], - 'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")] - }, - 'download_function': hg.get_batch_data - } - -] - - -def install(): - for model in model_config: - print(json.dumps(model.get('download_params'))) - model.get('download_function')(**model.get('download_params')) - - -if __name__ == '__main__': - install() diff --git a/installer/config.yaml b/installer/config.yaml deleted file mode 100644 index 9e19ef0b3..000000000 --- a/installer/config.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# 邮箱配置 -EMAIL_ADDRESS: ${EMAIL_ADDRESS} -EMAIL_USE_TLS: ${EMAIL_USE_TLS} -EMAIL_USE_SSL: ${EMAIL_USE_SSL} -EMAIL_HOST: ${EMAIL_HOST} -EMAIL_PORT: ${EMAIL_PORT} -EMAIL_HOST_USER: ${EMAIL_HOST_USER} -EMAIL_HOST_PASSWORD: ${EMAIL_HOST_PASSWORD} -# -# # 数据库链接信息 -DB_NAME: maxkb -DB_HOST: 127.0.0.1 -DB_PORT: 5432 -DB_USER: root -DB_PASSWORD: Password123@postgres -DB_ENGINE: django.db.backends.postgresql_psycopg2 -EMBEDDING_MODEL_PATH: /opt/maxkb/model/embedding -EMBEDDING_MODEL_NAME: /opt/maxkb/model/embedding/shibing624_text2vec-base-chinese - -DEBUG: false \ No newline at end of file diff --git a/installer/init.sql b/installer/init.sql deleted file mode 100644 index 98550891a..000000000 --- a/installer/init.sql +++ /dev/null @@ -1,5 +0,0 @@ -CREATE DATABASE "maxkb"; - -\c "maxkb"; - -CREATE EXTENSION "vector" VERSION '0.5.1'; \ No newline at end of file diff --git a/installer/run-maxkb.sh b/installer/run-maxkb.sh deleted file mode 100644 index 4f388f695..000000000 --- a/installer/run-maxkb.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Start postgress -docker-entrypoint.sh postgres & - -# Wait postgress -until pg_isready --host=127.0.0.1; do sleep 1 && echo "waiting for postgres"; done - -# Start MaxKB -python /opt/maxkb/app/main.py start & - -# Wait for any process to exit -wait -n - -# Exit with status of process that exited first -exit $? \ No newline at end of file