From cb8ed2c09a1418f1370f4034064e526f67beeaa4 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:40:42 +0800 Subject: [PATCH] Make DeepEP combine recv do not overlap (#11535) --- python/sglang/srt/layers/moe/token_dispatcher/deepep.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py index 5e980f472..abd0b9e82 100644 --- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py +++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py @@ -616,6 +616,9 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase): return hidden_states, event, hook, overlap_args def combine_b(self, hidden_states, event, hook, overlap_args): + if overlap_args is not None: + overlap_args.stream.wait_stream(self.device_module.current_stream()) + hook() if self.return_recv_hook else event.current_stream_wait() if overlap_args is not None: