Add fp8 qkv_proj_with_rope kernel for CPU in sgl-kernel and add UT (#6493)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import unittest
|
||||
|
||||
import sgl_kernel
|
||||
import torch
|
||||
from sgl_kernel.common_ops import decode_attention_cpu as decode_attention
|
||||
from torch.nn.functional import scaled_dot_product_attention
|
||||
|
||||
from sglang.test.test_utils import CustomTestCase
|
||||
@@ -105,7 +105,7 @@ class TestDecodeAttention(CustomTestCase):
|
||||
v_buffer = v_buffer.transpose(0, 1).contiguous().transpose(0, 1)
|
||||
key = key.transpose(0, 1).contiguous().transpose(0, 1)
|
||||
value = value.transpose(0, 1).contiguous().transpose(0, 1)
|
||||
decode_attention(
|
||||
torch.ops.sgl_kernel.decode_attention_cpu(
|
||||
q,
|
||||
k_buffer,
|
||||
v_buffer,
|
||||
|
||||
Reference in New Issue
Block a user