[sgl-kernel] Support FlashInfer top_k_top_p_sampling_from_logits (#9060)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-08-15 01:56:36 +08:00
committed by GitHub
parent 432f2053dd
commit 53dcc750b6
6 changed files with 349 additions and 5 deletions

View File

@@ -85,7 +85,9 @@ from sgl_kernel.moe import (
)
from sgl_kernel.sampling import (
min_p_sampling_from_probs,
top_k_mask_logits,
top_k_renorm_prob,
top_k_top_p_sampling_from_logits,
top_k_top_p_sampling_from_probs,
top_p_renorm_prob,
top_p_sampling_from_probs,