From 86fe943bc30cc71c2c8784139eda4b7b25f58208 Mon Sep 17 00:00:00 2001
From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
Date: Mon, 9 Jun 2025 08:41:14 +0800
Subject: [PATCH] Fix expert distribution dumping causes OOM (#6967)

---
 python/sglang/srt/managers/expert_distribution.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/managers/expert_distribution.py
index 59206117f..9e994a734 100644
--- a/python/sglang/srt/managers/expert_distribution.py
+++ b/python/sglang/srt/managers/expert_distribution.py
@@ -703,6 +703,7 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin):
             dtype=torch.int32,
             device=self._server_args.device,
         )
+        self._first_dump = True
 
     def append(
         self,
@@ -727,9 +728,15 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin):
             num_logical_experts=self._expert_location_metadata.num_logical_experts,
             physical_to_logical_map=self._expert_location_metadata.physical_to_logical_map,
         )
+
+        if self._first_dump:
+            self._first_dump = False
+            torch.cuda.empty_cache()
+
         torch.distributed.all_reduce(
             logical_count_of_buffered_step, op=torch.distributed.ReduceOp.SUM
         )
+
         output = dict(
             rank=self._rank,
             logical_count=logical_count_of_buffered_step,