From 2a9f483af89f01a5eb913ddc3079ba075b19e06e Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Tue, 10 Feb 2026 18:18:32 +0800 Subject: [PATCH] add qwen3_moe --- vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py index 6445351..26415a8 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py +++ b/vllm-v0.6.2/vllm/model_executor/models/qwen3_moe.py @@ -258,7 +258,11 @@ class Qwen3MoeAttention(nn.Module): k_by_head = self.k_norm(k_by_head) k = k_by_head.reshape(k_shape) - q, k = self.rotary_emb(positions, q, k) + # MLU's forward_mlu signature is (positions, x, offsets=None), + # so we must call separately for q and k to avoid k being + # treated as offsets. + q = self.rotary_emb(positions, q) + k = self.rotary_emb(positions, k) attn_output = self.attn(q, k, v, kv_cache, attn_metadata) output, _ = self.o_proj(attn_output) return output