mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
refactor: change model path in MKTokenizer.
This commit is contained in:
parent
f457588cd5
commit
f9c1742b43
|
|
@ -6,11 +6,8 @@
|
|||
@date:2024/4/28 10:17
|
||||
@desc:
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
|
||||
class MKTokenizer:
|
||||
def __init__(self, tokenizer):
|
||||
|
|
@ -27,6 +24,7 @@ class TokenizerManage:
|
|||
def get_tokenizer():
|
||||
from tokenizers import Tokenizer
|
||||
# 创建Tokenizer
|
||||
s = os.path.join(BASE_DIR.parent, 'tokenizer', 'bert-base-cased', 'tokenizer.json')
|
||||
TokenizerManage.tokenizer = Tokenizer.from_file(s)
|
||||
model_path = os.path.join("/opt/maxkb-app", "model", "tokenizer", "models--bert-base-cased")
|
||||
with open(f"{model_path}/refs/main", encoding="utf-8") as f: snapshot = f.read()
|
||||
TokenizerManage.tokenizer = Tokenizer.from_file(f"{model_path}/snapshots/{snapshot}/tokenizer.json")
|
||||
return MKTokenizer(TokenizerManage.tokenizer)
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
{
|
||||
"architectures": [
|
||||
"BertForMaskedLM"
|
||||
],
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"gradient_checkpointing": false,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"max_position_embeddings": 512,
|
||||
"model_type": "bert",
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"pad_token_id": 0,
|
||||
"position_embedding_type": "absolute",
|
||||
"transformers_version": "4.6.0.dev0",
|
||||
"type_vocab_size": 2,
|
||||
"use_cache": true,
|
||||
"vocab_size": 28996
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -1 +0,0 @@
|
|||
{"do_lower_case": false, "model_max_length": 512}
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue