llama : add Minerva 7B model support (#10673)
* Support for Minerva 7B * Update convert_hf_to_gguf_update.py
This commit is contained in:
@@ -418,6 +418,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
||||
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
|
||||
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
|
||||
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
|
||||
case LLAMA_VOCAB_PRE_TYPE_MINERVA:
|
||||
regex_exprs = {
|
||||
"\\p{N}",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
|
||||
@@ -6479,6 +6479,9 @@ static void llm_load_vocab(
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_CHAMELEON;
|
||||
vocab.tokenizer_add_bos = true;
|
||||
vocab.tokenizer_clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "minerva-7b") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user