Add PDL support for quant kernel and rope kernel (#9106)

This commit is contained in:
fzyzcjy
2025-08-20 16:56:29 +08:00
committed by GitHub
parent c9bf3877a0
commit 42c8704560
7 changed files with 80 additions and 33 deletions

View File

@@ -90,7 +90,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def(
"apply_rope_pos_ids_cos_sin_cache(Tensor q, Tensor k, Tensor! q_rope, Tensor! k_rope, Tensor cos_sin_cache, "
"Tensor pos_ids, bool interleave, int cuda_stream, "
"Tensor pos_ids, bool interleave, bool enable_pdl, int cuda_stream, "
"Tensor? v, Tensor!? k_buffer, Tensor!? v_buffer, Tensor? kv_cache_loc) -> ()");
m.impl("apply_rope_pos_ids_cos_sin_cache", torch::kCUDA, &apply_rope_pos_ids_cos_sin_cache);