From cf92e956885a074c7ea1117b0d0fc7f50c50b38c Mon Sep 17 00:00:00 2001
From: Chranos <826995883@qq.com>
Date: Tue, 10 Feb 2026 18:09:58 +0800
Subject: [PATCH] add qwen3_moe

---
 vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py
index 787d072..6445351 100644
--- a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py
@@ -244,15 +244,19 @@ class Qwen3MoeAttention(nn.Module):
                              dim=-1)
 
         # Qwen3 specific: Apply QK normalization before rotary embedding
+        # Use .contiguous() to ensure memory layout is compatible with
+        # MLU's RMSNorm which uses .view() internally.
+        q_shape = q.shape
         q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim,
-                           self.head_dim)
+                           self.head_dim).contiguous()
         q_by_head = self.q_norm(q_by_head)
-        q = q_by_head.view(q.shape)
+        q = q_by_head.reshape(q_shape)
 
+        k_shape = k.shape
         k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim,
-                           self.head_dim)
+                           self.head_dim).contiguous()
         k_by_head = self.k_norm(k_by_head)
-        k = k_by_head.view(k.shape)
+        k = k_by_head.reshape(k_shape)
 
         q, k = self.rotary_emb(positions, q, k)
         attn_output = self.attn(q, k, v, kv_cache, attn_metadata)