MaxKB/installer/Dockerfile-vector-model
2025-11-07 15:34:36 +08:00

35 lines
1.6 KiB
Plaintext

#FROM python:3.11-slim-bookworm AS vector-model
#COPY installer/install_model.py install_model.py
#RUN pip3 install --upgrade pip setuptools && \
# pip install pycrawlers && \
# pip install transformers && \
# python3 install_model.py && \
# cp -r model/base/hub model/tokenizer
#FROM scratch
#COPY --from=vector-model model /opt/maxkb-app/model
# 不知道为什么用上面的脚本重新拉一遍向量模型比之前的大很多,所以还是用下面的脚本复用原来已经构建好的向量模型
FROM python:3.11-slim-bookworm AS tmp-stage1
COPY installer/install_model_bert_base_cased.py install_model_bert_base_cased.py
RUN pip3 install --upgrade pip setuptools && \
pip install pycrawlers && \
pip install transformers && \
python3 install_model_bert_base_cased.py && \
cp -r model/base/hub model/tokenizer
FROM ghcr.io/1panel-dev/maxkb-vector-model:v1.0.1 AS vector-model
FROM alpine AS tmp-stage2
COPY --from=vector-model /opt/maxkb/app/model /opt/maxkb-app/model
COPY --from=vector-model /opt/maxkb/app/model/base/hub /opt/maxkb-app/model/tokenizer
COPY --from=tmp-stage1 model/tokenizer /opt/maxkb-app/model/tokenizer
RUN rm -rf /opt/maxkb-app/model/embedding/shibing624_text2vec-base-chinese/onnx
RUN apk add --update --no-cache curl && \
mkdir -p openai-tiktoken-cl100k-base && \
curl -Lf https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken > openai-tiktoken-cl100k-base/cl100k_base.tiktoken && \
mv -f openai-tiktoken-cl100k-base /opt/maxkb-app/model/tokenizer/
FROM scratch
COPY --from=tmp-stage2 /opt/maxkb-app/model /opt/maxkb-app/model