Fuse writing KV buffer into rope kernel (part 1: sgl-kernel) (#9077)

This commit is contained in:
fzyzcjy
2025-08-12 16:46:40 +08:00
committed by GitHub
parent fcc11e5ed5
commit 9aea255522
11 changed files with 1152 additions and 194 deletions

View File

@@ -150,7 +150,11 @@ void apply_rope_pos_ids_cos_sin_cache(
at::Tensor cos_sin_cache,
at::Tensor pos_ids,
bool interleave,
int64_t cuda_stream);
int64_t cuda_stream,
const std::optional<at::Tensor>& v,
const std::optional<at::Tensor>& k_buffer,
const std::optional<at::Tensor>& v_buffer,
const std::optional<at::Tensor>& kv_cache_loc);
#ifdef USE_ROCM
void gelu_quick(at::Tensor& out, const at::Tensor& input);