Fuse writing KV buffer into rope kernel (part 1: sgl-kernel) (#9077)

2025-08-12 16:46:40 +08:00
parent fcc11e5ed5
commit 9aea255522
11 changed files with 1152 additions and 194 deletions
--- a/sgl-kernel/include/sgl_kernel_ops.h
+++ b/sgl-kernel/include/sgl_kernel_ops.h
@@ -150,7 +150,11 @@ void apply_rope_pos_ids_cos_sin_cache(
    at::Tensor cos_sin_cache,
    at::Tensor pos_ids,
    bool interleave,
-    int64_t cuda_stream);
+    int64_t cuda_stream,
+    const std::optional<at::Tensor>& v,
+    const std::optional<at::Tensor>& k_buffer,
+    const std::optional<at::Tensor>& v_buffer,
+    const std::optional<at::Tensor>& kv_cache_loc);

 #ifdef USE_ROCM
 void gelu_quick(at::Tensor& out, const at::Tensor& input);