add qwen3_moe

2026-02-10 18:22:13 +08:00
parent efbb06147a
commit fa0219fbf8
1 changed files with 7 additions and 5 deletions
--- a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py
@@ -258,11 +258,13 @@ class Qwen3MoeAttention(nn.Module):
        k_by_head = self.k_norm(k_by_head)
        k = k_by_head.reshape(k_shape)

-        # MLU's forward_mlu signature is (positions, x, offsets=None),
-        # so we must call separately for q and k to avoid k being
-        # treated as offsets.
-        q = self.rotary_emb(positions, q)
-        k = self.rotary_emb(positions, k)
+        # MLU rotary_emb expects a single concatenated 3D tensor, not
+        # separate q and k (forward_mlu signature differs from forward_native).
+        qk = torch.cat([q, k], dim=-1)
+        self.rotary_emb(positions,
+                        qk.view(-1, self.num_heads + self.num_kv_heads,
+                                self.head_dim))
+        q, k = qk.split([self.q_size, self.kv_size], dim=-1)
        attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
        output, _ = self.o_proj(attn_output)
        return output