初始化项目,由ModelHub XC社区提供模型

Model: AI-ModelScope/dolphin-2.6-mistral-7b
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-15 06:03:59 +08:00
commit 6206fe914d
23 changed files with 132996 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("/workspace/dolphin-2.6-mistral-7b-hf")
# 1. Remove the "</s>" token from the vocabulary
vocab = tokenizer.get_vocab()
del vocab['</s>']
vocab['<|im_end|>'] = 2
tokenizer = AutoTokenizer.from_pretrained(
"/workspace/dolphin-2.6-mistral-7b-hf",
vocab=vocab
)
tokenizer.eos_token = "<|im_end|>"
tokenizer.pad_token = "<|im_end|>"
# 5. Save the modified tokenizer
tokenizer.save_pretrained('/workspace/dolphin-new-tokenizer/')