Fix incorrect KV cache allocation for MTP models. (#8482)

Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
Lifu Huang
2025-07-28 22:54:50 -07:00
committed by GitHub
parent 0ce84c822b
commit fb16fbaf52
2 changed files with 18 additions and 13 deletions

View File

@@ -261,6 +261,9 @@ class ModelConfig:
self.num_key_value_heads = self.num_attention_heads
self.hidden_size = self.hf_text_config.hidden_size
self.num_hidden_layers = self.hf_text_config.num_hidden_layers
self.num_nextn_predict_layers = getattr(
self.hf_text_config, "num_nextn_predict_layers", None
)
self.vocab_size = self.hf_text_config.vocab_size
# Verify quantization