Fix config for vLLM v2 compatibility: use_cache=true, rope_scaling format

This commit is contained in:
Parker Sytz
2026-03-21 12:24:51 +00:00
committed by system
parent 987520d420
commit 07f90ba4c7

View File

@@ -1,37 +1,35 @@
{ {
"architectures": [ "architectures": [
"LlamaForCausalLM" "LlamaForCausalLM"
], ],
"attention_bias": false, "attention_bias": false,
"attention_dropout": 0.0, "attention_dropout": 0.0,
"bos_token_id": 128000, "bos_token_id": 128000,
"torch_dtype": "bfloat16", "torch_dtype": "bfloat16",
"eos_token_id": 128009, "eos_token_id": 128009,
"head_dim": 128, "head_dim": 128,
"hidden_act": "silu", "hidden_act": "silu",
"hidden_size": 4096, "hidden_size": 4096,
"initializer_range": 0.02, "initializer_range": 0.02,
"intermediate_size": 14336, "intermediate_size": 14336,
"max_position_embeddings": 131072, "max_position_embeddings": 131072,
"mlp_bias": false, "mlp_bias": false,
"model_type": "llama", "model_type": "llama",
"num_attention_heads": 32, "num_attention_heads": 32,
"num_hidden_layers": 32, "num_hidden_layers": 32,
"num_key_value_heads": 8, "num_key_value_heads": 8,
"pad_token_id": 128004, "pad_token_id": 128004,
"pretraining_tp": 1, "pretraining_tp": 1,
"rms_norm_eps": 1e-05, "rms_norm_eps": 1e-05,
"rope_parameters": { "tie_word_embeddings": false,
"factor": 8.0, "use_cache": true,
"high_freq_factor": 4.0, "vocab_size": 128256,
"low_freq_factor": 1.0, "rope_scaling": {
"original_max_position_embeddings": 8192, "factor": 8.0,
"rope_theta": 500000.0, "high_freq_factor": 4.0,
"rope_type": "llama3" "low_freq_factor": 1.0,
}, "original_max_position_embeddings": 8192,
"tie_word_embeddings": false, "rope_theta": 500000.0,
"unsloth_fixed": true, "rope_type": "llama3"
"unsloth_version": "2026.3.8", }
"use_cache": false,
"vocab_size": 128256
} }