[1/2] Add FP8 Blockscale MoE CUTLASS kernel for Blackwell (#5281)

This commit is contained in:
Elfie Guo
2025-04-22 22:28:20 -07:00
committed by GitHub
parent 71d1785f2d
commit e62c49557d
8 changed files with 732 additions and 1 deletions

7
sgl-kernel/python/sgl_kernel/__init__.py Normal file → Executable file
View File

@@ -41,7 +41,12 @@ from sgl_kernel.gemm import (
sgl_per_token_group_quant_int8,
sgl_per_token_quant_fp8,
)
from sgl_kernel.moe import moe_align_block_size, moe_fused_gate, topk_softmax
from sgl_kernel.moe import (
fp8_blockwise_scaled_grouped_mm,
moe_align_block_size,
moe_fused_gate,
topk_softmax,
)
from sgl_kernel.sampling import (
min_p_sampling_from_probs,
top_k_renorm_prob,