Add a CUDA kernel for fusing mapping and weighted sum for MoE. (#6916)
Co-authored-by: Elfie Guo <elfiegxf@gmail.com>
This commit is contained in:
@@ -48,6 +48,7 @@ from sgl_kernel.gemm import (
|
||||
)
|
||||
from sgl_kernel.grammar import apply_token_bitmask_inplace_cuda
|
||||
from sgl_kernel.moe import (
|
||||
apply_shuffle_mul_sum,
|
||||
cutlass_fp4_group_mm,
|
||||
ep_moe_post_reorder,
|
||||
ep_moe_pre_reorder,
|
||||
|
||||
Reference in New Issue
Block a user