diff --git a/vllm-v0.6.2/vllm/model_executor/models/registry.py b/vllm-v0.6.2/vllm/model_executor/models/registry.py
index 7be43c6..2fd3a37 100644
--- a/vllm-v0.6.2/vllm/model_executor/models/registry.py
+++ b/vllm-v0.6.2/vllm/model_executor/models/registry.py
@@ -447,12 +447,18 @@ class _ModelRegistry:
                 warn_on_fail=True,
             )
             if model_cls is not None:
-                logger.info(
-                    "Found custom model class %s from auto_map[%s], "
-                    "using TransformersForCausalLM wrapper",
-                    model_cls.__name__,
-                    name
-                )
+                # Only log once per model class to avoid spam
+                log_key = f"{model_cls.__name__}_{name}"
+                if not hasattr(self, '_logged_custom_models'):
+                    self._logged_custom_models = set()
+                if log_key not in self._logged_custom_models:
+                    logger.info(
+                        "Found custom model class %s from auto_map[%s], "
+                        "using TransformersForCausalLM wrapper",
+                        model_cls.__name__,
+                        name
+                    )
+                    self._logged_custom_models.add(log_key)
                 # Return the wrapper architecture, not the actual class
                 return "TransformersForCausalLM"
         
diff --git a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py
index 63765d0..24b73eb 100644
--- a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py
+++ b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py
@@ -219,6 +219,9 @@ class Base(nn.Module):
         # Replace modules (with tensor parallel support)
         self._replace_modules()
         
+        # Fix attention head_dim in case config was incorrect
+        self._fix_attention_head_dim()
+        
         # Replace input embeddings
         self._replace_input_embeddings()
         
@@ -248,20 +251,36 @@ class Base(nn.Module):
         # Set attention implementation to vLLM's
         self.text_config._attn_implementation = "vllm"
         
-        # Ensure head_dim is correctly set in config
+        # Ensure head_dim is correctly set in BOTH config and text_config
+        # Transformers models use config.head_dim to compute attention dimensions
         # Some models may have incorrect head_dim, so we compute and set it
         if hasattr(self.text_config, "num_attention_heads") and hasattr(self.text_config, "hidden_size"):
             correct_head_dim = self.text_config.hidden_size // self.text_config.num_attention_heads
+            
+            # Check and fix head_dim in text_config
             if hasattr(self.text_config, "head_dim"):
                 if self.text_config.head_dim != correct_head_dim:
                     logger.warning(
-                        "Correcting head_dim in config: %d -> %d",
+                        "Correcting head_dim in text_config: %d -> %d",
                         self.text_config.head_dim, correct_head_dim
                     )
                     self.text_config.head_dim = correct_head_dim
             else:
-                # Set head_dim if not present, some models need it
                 self.text_config.head_dim = correct_head_dim
+            
+            # Also set in self.config (which is passed to AutoModel.from_config)
+            if hasattr(self.config, "head_dim"):
+                if self.config.head_dim != correct_head_dim:
+                    logger.warning(
+                        "Correcting head_dim in config: %d -> %d",
+                        self.config.head_dim, correct_head_dim
+                    )
+                    self.config.head_dim = correct_head_dim
+            else:
+                self.config.head_dim = correct_head_dim
+            
+            # Some models also need _attn_implementation in config
+            self.config._attn_implementation = "vllm"
         
         with init_on_device_without_buffers("meta"):
             self.model: "PreTrainedModel" = AutoModel.from_config(
@@ -435,6 +454,36 @@ class Base(nn.Module):
         _recursive_replace(self.model, "model")
         logger.info("Replaced %d modules", replaced_count)
     
+    def _fix_attention_head_dim(self) -> None:
+        """
+        Fix head_dim in attention modules after model creation.
+        
+        Some models may have incorrect head_dim in config, which causes
+        Transformers attention modules to use wrong dimensions for RoPE.
+        This method corrects head_dim in all attention modules.
+        """
+        correct_head_dim = self.hidden_size // getattr(
+            self.text_config, "num_attention_heads", 32
+        )
+        
+        fixed_count = 0
+        for name, module in self.model.named_modules():
+            # Check if this is an attention module
+            module_name = module.__class__.__name__
+            if "Attention" in module_name:
+                # Fix head_dim if it exists and is incorrect
+                if hasattr(module, "head_dim"):
+                    if module.head_dim != correct_head_dim:
+                        logger.debug(
+                            "Fixing head_dim in %s: %d -> %d",
+                            name, module.head_dim, correct_head_dim
+                        )
+                        module.head_dim = correct_head_dim
+                        fixed_count += 1
+        
+        if fixed_count > 0:
+            logger.info("Fixed head_dim in %d attention modules", fixed_count)
+    
     def _replace_input_embeddings(self) -> None:
         """Replace input embeddings with VocabParallelEmbedding."""
         input_embeddings = self.model.get_input_embeddings()