Forbid DeepEP racing condition when too many tokens (#9567)

2025-09-05 19:47:05 +08:00
parent 13705dae06
commit adf73175d6
1 changed files with 3 additions and 0 deletions
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase:
        self.num_max_dispatch_tokens_per_rank = get_int_env_var(
            "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
        )
+        # DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
+        # and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
+        assert self.num_max_dispatch_tokens_per_rank <= 1024

        self.handle = None