Add fp8 qkv_proj_with_rope kernel for CPU in sgl-kernel and add UT (#6493)

2025-05-23 15:14:46 +08:00
parent 4685fbb888
commit 4ba1eea83f
5 changed files with 483 additions and 11 deletions
--- a/test/srt/cpu/test_decode.py
+++ b/test/srt/cpu/test_decode.py
@@ -1,7 +1,7 @@
 import unittest

+import sgl_kernel
 import torch
-from sgl_kernel.common_ops import decode_attention_cpu as decode_attention
 from torch.nn.functional import scaled_dot_product_attention

 from sglang.test.test_utils import CustomTestCase
@@ -105,7 +105,7 @@ class TestDecodeAttention(CustomTestCase):
        v_buffer = v_buffer.transpose(0, 1).contiguous().transpose(0, 1)
        key = key.transpose(0, 1).contiguous().transpose(0, 1)
        value = value.transpose(0, 1).contiguous().transpose(0, 1)
-        decode_attention(
+        torch.ops.sgl_kernel.decode_attention_cpu(
            q,
            k_buffer,
            v_buffer,