From adf73175d617dea9e7f216bd56de7e7c9c306bd1 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Fri, 5 Sep 2025 19:47:05 +0800 Subject: [PATCH] Forbid DeepEP racing condition when too many tokens (#9567) --- python/sglang/srt/layers/moe/token_dispatcher/deepep.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py index 3e070d814..c6ea49089 100644 --- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py +++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py @@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase: self.num_max_dispatch_tokens_per_rank = get_int_env_var( "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128 ) + # DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024 + # and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it + assert self.num_max_dispatch_tokens_per_rank <= 1024 self.handle = None