Organize Attention Backends (#1547)

2024-09-30 15:54:18 -07:00
parent 0f4fb19bc8
commit 99ec439da4
12 changed files with 229 additions and 205 deletions
--- a/scripts/deprecated/test_flashinfer.py
+++ b/scripts/deprecated/test_flashinfer.py
@@ -6,8 +6,8 @@ from flashinfer import (
 )
 from flashinfer.decode import _grouped_size_compiled_for_decode_kernels

-from sglang.srt.layers.token_attention import token_attention_fwd
-from sglang.srt.layers.triton_attention.extend_attention import (
+from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd
+from sglang.srt.layers.attention.triton_ops.extend_attention import (
    extend_attention_fwd,
    redundant_attention,
 )
@@ -159,7 +159,7 @@ def test_batch_decode_with_paged_kv_cache(
    b_seq_len = torch.full((batch_size,), kv_len, dtype=torch.int32).to(0)
    max_len_in_batch = kv_len
    other_kv_index = 0
-    token_attention_fwd(
+    decode_attention_fwd(
        q,
        k_buffer,
        v_buffer,