add qwen3_moe
This commit is contained in:
@@ -258,7 +258,11 @@ class Qwen3MoeAttention(nn.Module):
|
|||||||
k_by_head = self.k_norm(k_by_head)
|
k_by_head = self.k_norm(k_by_head)
|
||||||
k = k_by_head.reshape(k_shape)
|
k = k_by_head.reshape(k_shape)
|
||||||
|
|
||||||
q, k = self.rotary_emb(positions, q, k)
|
# MLU's forward_mlu signature is (positions, x, offsets=None),
|
||||||
|
# so we must call separately for q and k to avoid k being
|
||||||
|
# treated as offsets.
|
||||||
|
q = self.rotary_emb(positions, q)
|
||||||
|
k = self.rotary_emb(positions, k)
|
||||||
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
|
||||||
output, _ = self.o_proj(attn_output)
|
output, _ = self.o_proj(attn_output)
|
||||||
return output
|
return output
|
||||||
|
|||||||
Reference in New Issue
Block a user