build: allinone打包

This commit is contained in:
liqiang-fit2cloud 2024-03-15 12:58:46 +08:00
parent cbd33e3136
commit 985d476207
5 changed files with 8 additions and 276 deletions

View File

@ -1,5 +1,5 @@
FROM python:3.11-slim as vector-model-build
COPY install_model.py install_model.py
COPY build/install_model.py install_model.py
RUN pip3 install --upgrade pip setuptools && \
pip install pycrawlers && \
pip install transformers && \
@ -20,7 +20,7 @@ RUN apt-get update
RUN apt-get install postgresql-15-pgvector
COPY installer/init.sql /docker-entrypoint-initdb.d
COPY build/init.sql /docker-entrypoint-initdb.d
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
@ -28,179 +28,21 @@ ENV POSTGRES_USER root
ENV POSTGRES_PASSWORD Password123@postgres
# ---- prepare python env --- #
# ---- copy from https://github.com/docker-library/python/blob/master/3.11/slim-bookworm/Dockerfile --- #
# ensure local python is preferred over distribution python
ENV PATH /usr/local/bin:$PATH
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
# runtime dependencies
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates \
netbase \
tzdata \
; \
rm -rf /var/lib/apt/lists/*
ENV GPG_KEY A035C8C19219BA821ECEA86B64E628F8D684696D
COPY build/install-python.sh /install-python.sh
RUN chmod 755 /install-python.sh; bash -c "/install-python.sh > /dev/null 2>&1" ; rm -f /install-python.sh
ENV PYTHON_VERSION 3.11.8
RUN set -eux; \
\
savedAptMark="$(apt-mark showmanual)"; \
apt-get update; \
apt-get install -y --no-install-recommends \
dpkg-dev \
gcc \
gnupg \
libbluetooth-dev \
libbz2-dev \
libc6-dev \
libdb-dev \
libexpat1-dev \
libffi-dev \
libgdbm-dev \
liblzma-dev \
libncursesw5-dev \
libreadline-dev \
libsqlite3-dev \
libssl-dev \
make \
tk-dev \
uuid-dev \
wget \
xz-utils \
zlib1g-dev \
; \
\
wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; \
wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc"; \
GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; \
gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY"; \
gpg --batch --verify python.tar.xz.asc python.tar.xz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" python.tar.xz.asc; \
mkdir -p /usr/src/python; \
tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; \
rm python.tar.xz; \
\
cd /usr/src/python; \
gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; \
./configure \
--build="$gnuArch" \
--enable-loadable-sqlite-extensions \
--enable-optimizations \
--enable-option-checking=fatal \
--enable-shared \
--with-lto \
--with-system-expat \
--without-ensurepip \
; \
nproc="$(nproc)"; \
EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; \
LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; \
LDFLAGS="${LDFLAGS:--Wl},--strip-all"; \
make -j "$nproc" \
"EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
"LDFLAGS=${LDFLAGS:-}" \
"PROFILE_TASK=${PROFILE_TASK:-}" \
; \
# https://github.com/docker-library/python/issues/784
# prevent accidental usage of a system installed libpython of the same version
rm python; \
make -j "$nproc" \
"EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
"LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" \
"PROFILE_TASK=${PROFILE_TASK:-}" \
python \
; \
make install; \
\
cd /; \
rm -rf /usr/src/python; \
\
find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \
-o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \
\) -exec rm -rf '{}' + \
; \
\
ldconfig; \
\
apt-mark auto '.*' > /dev/null; \
apt-mark manual $savedAptMark; \
find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec ldd '{}' ';' \
| awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' \
| sort -u \
| xargs -r dpkg-query --search \
| cut -d: -f1 \
| sort -u \
| xargs -r apt-mark manual \
; \
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \
rm -rf /var/lib/apt/lists/*; \
\
python3 --version
# make some useful symlinks that are expected to exist ("/usr/local/bin/python" and friends)
RUN set -eux; \
for src in idle3 pydoc3 python3 python3-config; do \
dst="$(echo "$src" | tr -d 3)"; \
[ -s "/usr/local/bin/$src" ]; \
[ ! -e "/usr/local/bin/$dst" ]; \
ln -svT "$src" "/usr/local/bin/$dst"; \
done
# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
ENV PYTHON_PIP_VERSION 24.0
# https://github.com/docker-library/python/issues/365
ENV PYTHON_SETUPTOOLS_VERSION 65.5.1
# https://github.com/pypa/get-pip
ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py
ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9
RUN set -eux; \
\
savedAptMark="$(apt-mark showmanual)"; \
apt-get update; \
apt-get install -y --no-install-recommends wget; \
\
wget -O get-pip.py "$PYTHON_GET_PIP_URL"; \
echo "$PYTHON_GET_PIP_SHA256 *get-pip.py" | sha256sum -c -; \
\
apt-mark auto '.*' > /dev/null; \
[ -z "$savedAptMark" ] || apt-mark manual $savedAptMark > /dev/null; \
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \
rm -rf /var/lib/apt/lists/*; \
\
export PYTHONDONTWRITEBYTECODE=1; \
\
python get-pip.py \
--disable-pip-version-check \
--no-cache-dir \
--no-compile \
"pip==$PYTHON_PIP_VERSION" \
"setuptools==$PYTHON_SETUPTOOLS_VERSION" \
; \
rm -f get-pip.py; \
\
pip --version
# ---- build maxkb --- #
# 创建工作目录
RUN mkdir -p /opt/maxkb/app && mkdir -p /opt/maxkb/model && mkdir -p /opt/maxkb/conf
VOLUME /opt/maxkb
# 拷贝项目
COPY . /opt/maxkb/app
COPY installer/config.yaml /opt/maxkb/conf
RUN rm -rf /opt/maxkb/app/ui /opt/maxkb/app/installer
COPY build/config.yaml /opt/maxkb/conf
RUN rm -rf /opt/maxkb/app/ui /opt/maxkb/app/build
COPY --from=vector-model-build model /opt/maxkb/app/model
COPY --from=web-build ui /opt/maxkb/app/ui
RUN ls -la /opt/maxkb/app
@ -222,7 +64,7 @@ RUN poetry export -f requirements.txt --output requirements.txt --without-hashes
RUN pip3 install --no-cache-dir -r requirements.txt
EXPOSE 8000
# 启动命令
COPY installer/run-maxkb.sh /usr/bin/
COPY build/run-maxkb.sh /usr/bin/
RUN chmod 755 /usr/bin/run-maxkb.sh
ENTRYPOINT ["bash", "-c"]
CMD [ "/usr/bin/run-maxkb.sh" ]

View File

@ -1,69 +0,0 @@
# coding=utf-8
"""
@project: maxkb
@Author
@file install_model.py
@date2023/12/18 14:02
@desc:
"""
import json
import os.path
from pycrawlers import huggingface
from transformers import GPT2TokenizerFast
hg = huggingface()
prefix_dir = "./model"
model_config = [
{
'download_params': {
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
'pretrained_model_name_or_path': 'gpt2'
},
'download_function': GPT2TokenizerFast.from_pretrained
},
{
'download_params': {
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
'pretrained_model_name_or_path': 'gpt2-medium'
},
'download_function': GPT2TokenizerFast.from_pretrained
},
{
'download_params': {
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
'pretrained_model_name_or_path': 'gpt2-large'
},
'download_function': GPT2TokenizerFast.from_pretrained
},
{
'download_params': {
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
'pretrained_model_name_or_path': 'gpt2-xl'
},
'download_function': GPT2TokenizerFast.from_pretrained
},
{
'download_params': {
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
'pretrained_model_name_or_path': 'distilgpt2'
},
'download_function': GPT2TokenizerFast.from_pretrained
},
{
'download_params': {
'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"],
'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")]
},
'download_function': hg.get_batch_data
}
]
def install():
for model in model_config:
print(json.dumps(model.get('download_params')))
model.get('download_function')(**model.get('download_params'))
if __name__ == '__main__':
install()

View File

@ -1,20 +0,0 @@
# 邮箱配置
EMAIL_ADDRESS: ${EMAIL_ADDRESS}
EMAIL_USE_TLS: ${EMAIL_USE_TLS}
EMAIL_USE_SSL: ${EMAIL_USE_SSL}
EMAIL_HOST: ${EMAIL_HOST}
EMAIL_PORT: ${EMAIL_PORT}
EMAIL_HOST_USER: ${EMAIL_HOST_USER}
EMAIL_HOST_PASSWORD: ${EMAIL_HOST_PASSWORD}
#
# # 数据库链接信息
DB_NAME: maxkb
DB_HOST: 127.0.0.1
DB_PORT: 5432
DB_USER: root
DB_PASSWORD: Password123@postgres
DB_ENGINE: django.db.backends.postgresql_psycopg2
EMBEDDING_MODEL_PATH: /opt/maxkb/model/embedding
EMBEDDING_MODEL_NAME: /opt/maxkb/model/embedding/shibing624_text2vec-base-chinese
DEBUG: false

View File

@ -1,5 +0,0 @@
CREATE DATABASE "maxkb";
\c "maxkb";
CREATE EXTENSION "vector" VERSION '0.5.1';

View File

@ -1,16 +0,0 @@
#!/bin/bash
# Start postgress
docker-entrypoint.sh postgres &
# Wait postgress
until pg_isready --host=127.0.0.1; do sleep 1 && echo "waiting for postgres"; done
# Start MaxKB
python /opt/maxkb/app/main.py start &
# Wait for any process to exit
wait -n
# Exit with status of process that exited first
exit $?