diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/managers/expert_distribution.py index 59206117f..9e994a734 100644 --- a/python/sglang/srt/managers/expert_distribution.py +++ b/python/sglang/srt/managers/expert_distribution.py @@ -703,6 +703,7 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin): dtype=torch.int32, device=self._server_args.device, ) + self._first_dump = True def append( self, @@ -727,9 +728,15 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin): num_logical_experts=self._expert_location_metadata.num_logical_experts, physical_to_logical_map=self._expert_location_metadata.physical_to_logical_map, ) + + if self._first_dump: + self._first_dump = False + torch.cuda.empty_cache() + torch.distributed.all_reduce( logical_count_of_buffered_step, op=torch.distributed.ReduceOp.SUM ) + output = dict( rank=self._rank, logical_count=logical_count_of_buffered_step,