diff --git a/.github/workflows/build-and-push-vector-model.yml b/.github/workflows/build-and-push-vector-model.yml index 5f3f0fab2..f783f5cc8 100644 --- a/.github/workflows/build-and-push-vector-model.yml +++ b/.github/workflows/build-and-push-vector-model.yml @@ -65,4 +65,5 @@ jobs: password: ${{ secrets.GH_TOKEN }} - name: Docker Buildx (build-and-push) run: | + rm -f .dockerignore docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile-vector-model \ No newline at end of file diff --git a/installer/install_model.py b/installer/install_model.py new file mode 100644 index 000000000..85cd96a19 --- /dev/null +++ b/installer/install_model.py @@ -0,0 +1,69 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:虎 + @file: install_model.py + @date:2023/12/18 14:02 + @desc: +""" +import json +import os.path +from pycrawlers import huggingface +from transformers import GPT2TokenizerFast +hg = huggingface() +prefix_dir = "./model" +model_config = [ + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-medium' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-large' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'gpt2-xl' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'cache_dir': os.path.join(prefix_dir, 'base/hub'), + 'pretrained_model_name_or_path': 'distilgpt2' + }, + 'download_function': GPT2TokenizerFast.from_pretrained + }, + { + 'download_params': { + 'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"], + 'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")] + }, + 'download_function': hg.get_batch_data + } + +] + + +def install(): + for model in model_config: + print(json.dumps(model.get('download_params'))) + model.get('download_function')(**model.get('download_params')) + + +if __name__ == '__main__': + install() \ No newline at end of file