[DeepseekV32] Add fast_topk_transform_ragged_fused kernel (#11815)
Signed-off-by: Hao Lu <14827759+hlu1@users.noreply.github.com>
This commit is contained in:
@@ -113,6 +113,10 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
|
||||
"fast_topk_transform_fused(Tensor score, Tensor lengths, Tensor dst_page_table, Tensor src_page_table, Tensor "
|
||||
"cu_seqlens_q) -> ()");
|
||||
m.impl("fast_topk_transform_fused", torch::kCUDA, &fast_topk_transform_interface);
|
||||
m.def(
|
||||
"fast_topk_transform_ragged_fused(Tensor score, Tensor lengths, Tensor topk_indices_ragged, Tensor "
|
||||
"topk_indices_offset) -> ()");
|
||||
m.impl("fast_topk_transform_ragged_fused", torch::kCUDA, &fast_topk_transform_ragged_interface);
|
||||
|
||||
/*
|
||||
* From gguf quantiztion
|
||||
|
||||
Reference in New Issue
Block a user