Organize Attention Backends (#1547)

This commit is contained in:
Liangsheng Yin
2024-09-30 15:54:18 -07:00
committed by GitHub
parent 0f4fb19bc8
commit 99ec439da4
12 changed files with 229 additions and 205 deletions

View File

@@ -4,7 +4,9 @@ import unittest
import numpy as np
import torch
from sglang.srt.layers.flashinfer_utils import create_flashinfer_kv_indices_triton
from sglang.srt.layers.attention.flashinfer_utils import (
create_flashinfer_kv_indices_triton,
)
class TestCreateKvIndices(unittest.TestCase):

View File

@@ -3,12 +3,14 @@ import unittest
import torch
from sglang.srt.layers.triton_attention.decode_attention import decode_attention_fwd
from sglang.srt.layers.triton_attention.extend_attention import (
from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd
from sglang.srt.layers.attention.triton_ops.extend_attention import (
extend_attention_fwd,
redundant_attention,
)
from sglang.srt.layers.triton_attention.prefill_attention import context_attention_fwd
from sglang.srt.layers.attention.triton_ops.prefill_attention import (
context_attention_fwd,
)
class TestExtendAttention(unittest.TestCase):