add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:17:07 +08:00
parent 86fd3b5a92
commit 2ad23aa8da

View File

@@ -176,14 +176,14 @@ def vllm__llama4__Llama4Attention__forward(
================== ==================
''' '''
# QK norm (教训 #2: use contiguous + reshape) # QK norm (MLU fused_rms_norm requires matching dtypes, skip .float())
if self.qk_norm is not None: if self.qk_norm is not None:
q = q.contiguous().reshape(-1, self.head_dim) q = q.contiguous().reshape(-1, self.head_dim)
q = (self.qk_norm(q.float()) q = (self.qk_norm(q)
.contiguous().reshape(-1, self.q_size).to(q.dtype)) .contiguous().reshape(-1, self.q_size))
k = k.contiguous().reshape(-1, self.head_dim) k = k.contiguous().reshape(-1, self.head_dim)
k = (self.qk_norm(k.float()) k = (self.qk_norm(k)
.contiguous().reshape(-1, self.kv_size).to(k.dtype)) .contiguous().reshape(-1, self.kv_size))
# Temperature tuning for NoPE layers # Temperature tuning for NoPE layers
if self.attn_temperature_tuning and self.nope: if self.attn_temperature_tuning and self.nope: