Forbid DeepEP racing condition when too many tokens (#9567)
This commit is contained in:
@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase:
|
|||||||
self.num_max_dispatch_tokens_per_rank = get_int_env_var(
|
self.num_max_dispatch_tokens_per_rank = get_int_env_var(
|
||||||
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
|
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
|
||||||
)
|
)
|
||||||
|
# DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
|
||||||
|
# and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
|
||||||
|
assert self.num_max_dispatch_tokens_per_rank <= 1024
|
||||||
|
|
||||||
self.handle = None
|
self.handle = None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user