Add endpoints to dump selected expert ids (#4435)
Co-authored-by: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
This commit is contained in:
@@ -21,6 +21,10 @@ from sglang.srt.utils import get_compiler_backend, is_cuda
|
||||
|
||||
_is_cuda = is_cuda()
|
||||
|
||||
from sglang.srt.managers.utils import ExpertDistributionRecorder
|
||||
|
||||
expert_distribution_recorder = ExpertDistributionRecorder()
|
||||
|
||||
|
||||
def fused_topk_native(
|
||||
hidden_states: torch.Tensor,
|
||||
@@ -223,4 +227,6 @@ def select_experts(
|
||||
renormalize=renormalize,
|
||||
)
|
||||
|
||||
expert_distribution_recorder.record_new_token(topk_ids)
|
||||
|
||||
return topk_weights, topk_ids
|
||||
|
||||
Reference in New Issue
Block a user