初始化项目,由ModelHub XC社区提供模型
Model: AI-ModelScope/dolphin-2.6-mistral-7b Source: Original Platform
This commit is contained in:
19
configs/modify-tokenizer.py
Normal file
19
configs/modify-tokenizer.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("/workspace/dolphin-2.6-mistral-7b-hf")
|
||||
|
||||
# 1. Remove the "</s>" token from the vocabulary
|
||||
vocab = tokenizer.get_vocab()
|
||||
del vocab['</s>']
|
||||
vocab['<|im_end|>'] = 2
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
"/workspace/dolphin-2.6-mistral-7b-hf",
|
||||
vocab=vocab
|
||||
)
|
||||
|
||||
tokenizer.eos_token = "<|im_end|>"
|
||||
tokenizer.pad_token = "<|im_end|>"
|
||||
|
||||
# 5. Save the modified tokenizer
|
||||
tokenizer.save_pretrained('/workspace/dolphin-new-tokenizer/')
|
||||
Reference in New Issue
Block a user