Fix config for vLLM v2 compatibility: use_cache=true, rope_scaling format

2026-03-21 12:24:51 +00:00
parent 987520d420
commit 07f90ba4c7
1 changed files with 33 additions and 35 deletions
--- a/config.json
+++ b/config.json
@@ -1,37 +1,35 @@
 {
-    "architectures": [
+  "architectures": [
-        "LlamaForCausalLM"
+    "LlamaForCausalLM"
-    ],
+  ],
-    "attention_bias": false,
+  "attention_bias": false,
-    "attention_dropout": 0.0,
+  "attention_dropout": 0.0,
-    "bos_token_id": 128000,
+  "bos_token_id": 128000,
-    "torch_dtype": "bfloat16",
+  "torch_dtype": "bfloat16",
-    "eos_token_id": 128009,
+  "eos_token_id": 128009,
-    "head_dim": 128,
+  "head_dim": 128,
-    "hidden_act": "silu",
+  "hidden_act": "silu",
-    "hidden_size": 4096,
+  "hidden_size": 4096,
-    "initializer_range": 0.02,
+  "initializer_range": 0.02,
-    "intermediate_size": 14336,
+  "intermediate_size": 14336,
-    "max_position_embeddings": 131072,
+  "max_position_embeddings": 131072,
-    "mlp_bias": false,
+  "mlp_bias": false,
-    "model_type": "llama",
+  "model_type": "llama",
-    "num_attention_heads": 32,
+  "num_attention_heads": 32,
-    "num_hidden_layers": 32,
+  "num_hidden_layers": 32,
-    "num_key_value_heads": 8,
+  "num_key_value_heads": 8,
-    "pad_token_id": 128004,
+  "pad_token_id": 128004,
-    "pretraining_tp": 1,
+  "pretraining_tp": 1,
-    "rms_norm_eps": 1e-05,
+  "rms_norm_eps": 1e-05,
-    "rope_parameters": {
+  "tie_word_embeddings": false,
-        "factor": 8.0,
+  "use_cache": true,
-        "high_freq_factor": 4.0,
+  "vocab_size": 128256,
-        "low_freq_factor": 1.0,
+  "rope_scaling": {
-        "original_max_position_embeddings": 8192,
+    "factor": 8.0,
-        "rope_theta": 500000.0,
+    "high_freq_factor": 4.0,
-        "rope_type": "llama3"
+    "low_freq_factor": 1.0,
-    },
+    "original_max_position_embeddings": 8192,
-    "tie_word_embeddings": false,
+    "rope_theta": 500000.0,
-    "unsloth_fixed": true,
+    "rope_type": "llama3"
-    "unsloth_version": "2026.3.8",
+  }
    "use_cache": false,
    "vocab_size": 128256
 }