Forbid DeepEP racing condition when too many tokens (#9567)

This commit is contained in:
fzyzcjy
2025-09-05 19:47:05 +08:00
committed by GitHub
parent 13705dae06
commit adf73175d6

View File

@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase:
self.num_max_dispatch_tokens_per_rank = get_int_env_var(
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
)
# DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
# and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
assert self.num_max_dispatch_tokens_per_rank <= 1024
self.handle = None