Add endpoints to dump selected expert ids (#4435)

Co-authored-by: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
This commit is contained in:
yuhsaun-t
2025-03-24 21:34:19 -07:00
committed by GitHub
parent 6b7038babd
commit 199bb01d00
10 changed files with 328 additions and 2 deletions

View File

@@ -21,6 +21,10 @@ from sglang.srt.utils import get_compiler_backend, is_cuda
_is_cuda = is_cuda()
from sglang.srt.managers.utils import ExpertDistributionRecorder
expert_distribution_recorder = ExpertDistributionRecorder()
def fused_topk_native(
hidden_states: torch.Tensor,
@@ -223,4 +227,6 @@ def select_experts(
renormalize=renormalize,
)
expert_distribution_recorder.record_new_token(topk_ids)
return topk_weights, topk_ids