diff --git a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py index 4812a90..63765d0 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py +++ b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py @@ -248,6 +248,21 @@ class Base(nn.Module): # Set attention implementation to vLLM's self.text_config._attn_implementation = "vllm" + # Ensure head_dim is correctly set in config + # Some models may have incorrect head_dim, so we compute and set it + if hasattr(self.text_config, "num_attention_heads") and hasattr(self.text_config, "hidden_size"): + correct_head_dim = self.text_config.hidden_size // self.text_config.num_attention_heads + if hasattr(self.text_config, "head_dim"): + if self.text_config.head_dim != correct_head_dim: + logger.warning( + "Correcting head_dim in config: %d -> %d", + self.text_config.head_dim, correct_head_dim + ) + self.text_config.head_dim = correct_head_dim + else: + # Set head_dim if not present, some models need it + self.text_config.head_dim = correct_head_dim + with init_on_device_without_buffers("meta"): self.model: "PreTrainedModel" = AutoModel.from_config( self.config,