Organize Attention Backends (#1547)

2024-09-30 15:54:18 -07:00
parent 0f4fb19bc8
commit 99ec439da4
12 changed files with 229 additions and 205 deletions
--- a/test/srt/test_create_kvindices.py
+++ b/test/srt/test_create_kvindices.py
@@ -4,7 +4,9 @@ import unittest
 import numpy as np
 import torch

-from sglang.srt.layers.flashinfer_utils import create_flashinfer_kv_indices_triton
+from sglang.srt.layers.attention.flashinfer_utils import (
+    create_flashinfer_kv_indices_triton,
+)


 class TestCreateKvIndices(unittest.TestCase):
--- a/test/srt/test_triton_attention_kernels.py
+++ b/test/srt/test_triton_attention_kernels.py
@@ -3,12 +3,14 @@ import unittest

 import torch

-from sglang.srt.layers.triton_attention.decode_attention import decode_attention_fwd
-from sglang.srt.layers.triton_attention.extend_attention import (
+from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd
+from sglang.srt.layers.attention.triton_ops.extend_attention import (
    extend_attention_fwd,
    redundant_attention,
 )
-from sglang.srt.layers.triton_attention.prefill_attention import context_attention_fwd
+from sglang.srt.layers.attention.triton_ops.prefill_attention import (
+    context_attention_fwd,
+)


 class TestExtendAttention(unittest.TestCase):