[4/N] MoE Refactor: Unified Triton Kernel for FusedMoE and EPMoE (#8515)

2025-07-31 02:34:02 -07:00
parent e7dc163f57
commit 32fa1e9cc2
6 changed files with 70 additions and 690 deletions
--- a/python/sglang/srt/layers/quantization/w4afp8.py
+++ b/python/sglang/srt/layers/quantization/w4afp8.py
@@ -276,6 +276,7 @@ class W4AFp8MoEMethod(FusedMoEMethodBase):
        layer: EPMoE,
        hidden_states: torch.Tensor,
        topk_output: TopKOutput,
+        **kwargs,
    ) -> torch.Tensor:

        # TODO(ch-wan): move it out of this class