Add endpoints to dump selected expert ids (#4435)

Co-authored-by: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
2025-03-24 21:34:19 -07:00
parent 6b7038babd
commit 199bb01d00
10 changed files with 328 additions and 2 deletions
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -21,6 +21,10 @@ from sglang.srt.utils import get_compiler_backend, is_cuda

 _is_cuda = is_cuda()

+from sglang.srt.managers.utils import ExpertDistributionRecorder
+
+expert_distribution_recorder = ExpertDistributionRecorder()
+

 def fused_topk_native(
    hidden_states: torch.Tensor,
@@ -223,4 +227,6 @@ def select_experts(
            renormalize=renormalize,
        )

+    expert_distribution_recorder.record_new_token(topk_ids)
+
    return topk_weights, topk_ids