diff --git a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py index f61bd8f..9cdeb99 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py +++ b/vllm-v0.6.2/vllm/model_executor/models/transformers/base.py @@ -222,6 +222,9 @@ class Base(nn.Module): # Fix attention head_dim in case config was incorrect self._fix_attention_head_dim() + # Add debug hook to first attention module to capture tensor shapes + self._add_attention_debug_hook() + # Replace input embeddings self._replace_input_embeddings() @@ -471,6 +474,36 @@ class Base(nn.Module): _recursive_replace(self.model, "model") logger.info("Replaced %d modules", replaced_count) + def _add_attention_debug_hook(self) -> None: + """Add a forward pre-hook to the first attention module for debugging.""" + for name, module in self.model.named_modules(): + if "Attention" in module.__class__.__name__: + def _debug_hook(mod, args, kwargs=None): + hidden = args[0] if args else None + if hidden is not None: + logger.info("DEBUG HOOK: Attention input hidden_states.shape=%s", hidden.shape) + # Print q_proj output shape + q_proj = getattr(mod, 'q_proj', None) + if q_proj is not None and hidden is not None: + try: + q_out = q_proj(hidden) + logger.info("DEBUG HOOK: q_proj output shape=%s", q_out.shape) + head_dim = getattr(mod, 'head_dim', 'NOT SET') + num_heads = getattr(mod, 'num_heads', 'NOT SET') + logger.info("DEBUG HOOK: Will try view with num_heads=%s, head_dim=%s", + num_heads, head_dim) + if isinstance(head_dim, int) and isinstance(num_heads, int): + expected = num_heads * head_dim + actual = q_out.shape[-1] + logger.info("DEBUG HOOK: q_proj output last dim=%d, expected (num_heads*head_dim)=%d, match=%s", + actual, expected, actual == expected) + except Exception as e: + logger.info("DEBUG HOOK: Error testing q_proj: %s", e) + + module.register_forward_pre_hook(_debug_hook) + logger.info("DEBUG: Added debug hook to %s", name) + break + def _fix_attention_head_dim(self) -> None: """ Fix head_dim in attention modules and rotary embeddings after model creation. @@ -696,19 +729,78 @@ class Base(nn.Module): # Forward through model # Note: return_dict=False returns tuple, first element is last hidden state - # DEBUG: Print attention module head_dim values just before forward - logger.info("DEBUG: Checking attention modules before forward...") - for name, module in self.model.named_modules(): - if "Attention" in module.__class__.__name__: - head_dim = getattr(module, 'head_dim', 'NOT SET') - rotary_emb = getattr(module, 'rotary_emb', None) - if rotary_emb: - emb_dim = getattr(rotary_emb, 'dim', 'N/A') - logger.info("DEBUG: %s: head_dim=%s, rotary_emb.dim=%s", - name, head_dim, emb_dim) - else: - logger.info("DEBUG: %s: head_dim=%s, rotary_emb=None", name, head_dim) - break # Just print first one + # DEBUG: Print detailed model structure info before forward + if not hasattr(self, '_debug_printed'): + self._debug_printed = True + logger.info("DEBUG: === Detailed model structure debug ===") + + # Print transformers version + try: + import transformers + logger.info("DEBUG: transformers version: %s", transformers.__version__) + except Exception: + pass + + # Print TP world size + logger.info("DEBUG: TP world_size=%d", self.tp_group.world_size) + + # Print first attention module details + for name, module in self.model.named_modules(): + if "Attention" in module.__class__.__name__: + logger.info("DEBUG: First attention: %s (class=%s)", name, module.__class__.__name__) + # Print all attributes + for attr in ['head_dim', 'num_heads', 'num_key_value_heads', + 'hidden_size', 'num_attention_heads', + 'num_key_value_groups']: + val = getattr(module, attr, 'NOT SET') + logger.info("DEBUG: %s = %s", attr, val) + + # Print rotary_emb + rotary = getattr(module, 'rotary_emb', None) + if rotary: + logger.info("DEBUG: rotary_emb: %s", type(rotary).__name__) + if hasattr(rotary, 'inv_freq'): + logger.info("DEBUG: rotary_emb.inv_freq.shape: %s", rotary.inv_freq.shape) + else: + logger.info("DEBUG: rotary_emb: None") + + # Print projection shapes + for proj_name in ['q_proj', 'k_proj', 'v_proj', 'o_proj']: + proj = getattr(module, proj_name, None) + if proj: + if hasattr(proj, 'weight'): + logger.info("DEBUG: %s: type=%s, weight.shape=%s", + proj_name, type(proj).__name__, + proj.weight.shape if proj.weight is not None else 'None') + elif hasattr(proj, 'output_size'): + logger.info("DEBUG: %s: type=%s, in=%s, out=%s, out_per_part=%s", + proj_name, type(proj).__name__, + getattr(proj, 'input_size', 'N/A'), + getattr(proj, 'output_size', 'N/A'), + getattr(proj, 'output_size_per_partition', 'N/A')) + break + + # Print model-level rotary_emb + model_rotary = getattr(self.model, 'rotary_emb', None) + if model_rotary: + logger.info("DEBUG: Model-level rotary_emb: %s", type(model_rotary).__name__) + if hasattr(model_rotary, 'inv_freq'): + logger.info("DEBUG: Model rotary_emb.inv_freq.shape: %s", model_rotary.inv_freq.shape) + else: + logger.info("DEBUG: No model-level rotary_emb") + # Check nested + for name, module in self.model.named_modules(): + if "RotaryEmbedding" in module.__class__.__name__: + inv_freq_shape = module.inv_freq.shape if hasattr(module, 'inv_freq') else 'N/A' + logger.info("DEBUG: Found rotary at %s: inv_freq.shape=%s", name, inv_freq_shape) + break + + # Print config details + for attr in ['head_dim', 'hidden_size', 'num_attention_heads', 'num_key_value_heads', + 'intermediate_size', 'num_hidden_layers']: + logger.info("DEBUG: config.%s = %s", attr, getattr(self.config, attr, 'NOT SET')) + + logger.info("DEBUG: === End debug ===") with torch.no_grad(): outputs = self.model(