add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:17:07 +08:00
parent 86fd3b5a92
commit 2ad23aa8da

View File

@@ -176,14 +176,14 @@ def vllm__llama4__Llama4Attention__forward(
==================
'''
# QK norm (教训 #2: use contiguous + reshape)
# QK norm (MLU fused_rms_norm requires matching dtypes, skip .float())
if self.qk_norm is not None:
q = q.contiguous().reshape(-1, self.head_dim)
q = (self.qk_norm(q.float())
.contiguous().reshape(-1, self.q_size).to(q.dtype))
q = (self.qk_norm(q)
.contiguous().reshape(-1, self.q_size))
k = k.contiguous().reshape(-1, self.head_dim)
k = (self.qk_norm(k.float())
.contiguous().reshape(-1, self.kv_size).to(k.dtype))
k = (self.qk_norm(k)
.contiguous().reshape(-1, self.kv_size))
# Temperature tuning for NoPE layers
if self.attn_temperature_tuning and self.nope: