From cf92e956885a074c7ea1117b0d0fc7f50c50b38c Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Tue, 10 Feb 2026 18:09:58 +0800 Subject: [PATCH] add qwen3_moe --- vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py index 787d072..6445351 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py +++ b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py @@ -244,15 +244,19 @@ class Qwen3MoeAttention(nn.Module): dim=-1) # Qwen3 specific: Apply QK normalization before rotary embedding + # Use .contiguous() to ensure memory layout is compatible with + # MLU's RMSNorm which uses .view() internally. + q_shape = q.shape q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, - self.head_dim) + self.head_dim).contiguous() q_by_head = self.q_norm(q_by_head) - q = q_by_head.view(q.shape) + q = q_by_head.reshape(q_shape) + k_shape = k.shape k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, - self.head_dim) + self.head_dim).contiguous() k_by_head = self.k_norm(k_by_head) - k = k_by_head.view(k.shape) + k = k_by_head.reshape(k_shape) q, k = self.rotary_emb(positions, q, k) attn_output = self.attn(q, k, v, kv_cache, attn_metadata)