From 8ada1ab6c791c82cf6b476a24818221121b3d799 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Sun, 3 Aug 2025 09:49:47 +0800 Subject: [PATCH] Fix triton moe error caused by TopK refactor (#8705) --- .../fused_moe_triton/triton_kernels_moe.py | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py index d0f90f2d8..eed33c5e8 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py @@ -146,34 +146,3 @@ def triton_kernel_fused_experts( ) return intermediate_cache3 - - -def triton_kernel_moe_forward_fake( - hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - gating_output: torch.Tensor, - topk: int, - renormalize: bool, - inplace: bool = False, - activation: str = "silu", - apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, -) -> torch.Tensor: - return torch.empty_like(hidden_states) - - -direct_register_custom_op( - op_name="forward_cuda_triton", - op_func=triton_kernel_moe_forward, - mutates_args=[], - fake_impl=triton_kernel_moe_forward_fake, -)