From 86fe943bc30cc71c2c8784139eda4b7b25f58208 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 9 Jun 2025 08:41:14 +0800 Subject: [PATCH] Fix expert distribution dumping causes OOM (#6967) --- python/sglang/srt/managers/expert_distribution.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/managers/expert_distribution.py index 59206117f..9e994a734 100644 --- a/python/sglang/srt/managers/expert_distribution.py +++ b/python/sglang/srt/managers/expert_distribution.py @@ -703,6 +703,7 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin): dtype=torch.int32, device=self._server_args.device, ) + self._first_dump = True def append( self, @@ -727,9 +728,15 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin): num_logical_experts=self._expert_location_metadata.num_logical_experts, physical_to_logical_map=self._expert_location_metadata.physical_to_logical_map, ) + + if self._first_dump: + self._first_dump = False + torch.cuda.empty_cache() + torch.distributed.all_reduce( logical_count_of_buffered_step, op=torch.distributed.ReduceOp.SUM ) + output = dict( rank=self._rank, logical_count=logical_count_of_buffered_step,