From 014cab4dd23eb637f18d887f767a1791db1ae8ac Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 9 Feb 2025 20:18:44 +0800 Subject: [PATCH] update forward_return_lse (#3425) --- python/sglang/srt/layers/attention/flashinfer_backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py index 5e8879b1d..bbe9a2e1a 100644 --- a/python/sglang/srt/layers/attention/flashinfer_backend.py +++ b/python/sglang/srt/layers/attention/flashinfer_backend.py @@ -409,9 +409,9 @@ class FlashInferAttnBackend(AttentionBackend): ) else: o1, s1 = self.prefill_wrapper_ragged.forward_return_lse( - q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim), - k.contiguous().view(-1, layer.tp_k_head_num, layer.head_dim), - v.contiguous().view(-1, layer.tp_v_head_num, layer.head_dim), + q.view(-1, layer.tp_q_head_num, layer.head_dim), + k.view(-1, layer.tp_k_head_num, layer.head_dim), + v.view(-1, layer.tp_v_head_num, layer.head_dim), causal=True, sm_scale=layer.scaling, logits_soft_cap=logits_soft_cap,