refactor: add cl100k_base.tiktoken tokenizer.

This commit is contained in:
liqiang-fit2cloud 2025-11-07 15:34:36 +08:00
parent f9c1742b43
commit f6c70da5ff
3 changed files with 6 additions and 3 deletions

View File

@ -5,7 +5,7 @@ on:
inputs:
dockerImageTag:
description: 'Docker Image Tag'
default: 'v2.0.2'
default: 'v2.0.3'
required: true
architecture:
description: 'Architecture'

View File

@ -1,7 +1,7 @@
FROM python:3.11-slim-trixie AS python-stage
RUN python3 -m venv /opt/py3
FROM ghcr.io/1panel-dev/maxkb-vector-model:v2.0.2 AS vector-model
FROM ghcr.io/1panel-dev/maxkb-vector-model:v2.0.3 AS vector-model
FROM postgres:17.6-trixie
COPY --from=python-stage /usr/local /usr/local

View File

@ -25,7 +25,10 @@ COPY --from=vector-model /opt/maxkb/app/model /opt/maxkb-app/model
COPY --from=vector-model /opt/maxkb/app/model/base/hub /opt/maxkb-app/model/tokenizer
COPY --from=tmp-stage1 model/tokenizer /opt/maxkb-app/model/tokenizer
RUN rm -rf /opt/maxkb-app/model/embedding/shibing624_text2vec-base-chinese/onnx
RUN apk add --update --no-cache curl && \
mkdir -p openai-tiktoken-cl100k-base && \
curl -Lf https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken > openai-tiktoken-cl100k-base/cl100k_base.tiktoken && \
mv -f openai-tiktoken-cl100k-base /opt/maxkb-app/model/tokenizer/
FROM scratch