forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3_moe
This commit is contained in:
@@ -258,11 +258,13 @@ class Qwen3MoeAttention(nn.Module):
|
||||
k_by_head = self.k_norm(k_by_head)
|
||||
k = k_by_head.reshape(k_shape)
|
||||
|
||||
# MLU's forward_mlu signature is (positions, x, offsets=None),
|
||||
# so we must call separately for q and k to avoid k being
|
||||
# treated as offsets.
|
||||
q = self.rotary_emb(positions, q)
|
||||
k = self.rotary_emb(positions, k)
|
||||
# MLU rotary_emb expects a single concatenated 3D tensor, not
|
||||
# separate q and k (forward_mlu signature differs from forward_native).
|
||||
qk = torch.cat([q, k], dim=-1)
|
||||
self.rotary_emb(positions,
|
||||
qk.view(-1, self.num_heads + self.num_kv_heads,
|
||||
self.head_dim))
|
||||
q, k = qk.split([self.q_size, self.kv_size], dim=-1)
|
||||
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
||||
output, _ = self.o_proj(attn_output)
|
||||
return output
|
||||
|
||||
Reference in New Issue
Block a user