Tiny support customize DeepEP max dispatch tokens per rank (#6934)
This commit is contained in:
@@ -6,7 +6,7 @@ from sglang.srt.managers.expert_distribution import (
|
||||
get_global_expert_distribution_recorder,
|
||||
)
|
||||
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
||||
from sglang.srt.utils import DeepEPMode, load_json_config
|
||||
from sglang.srt.utils import DeepEPMode, get_int_env_var, load_json_config
|
||||
|
||||
try:
|
||||
from deep_ep import Buffer, Config
|
||||
@@ -188,7 +188,9 @@ class _DeepEPDispatcherImplBase:
|
||||
self.deepep_mode = deepep_mode
|
||||
|
||||
self.params_bytes = 2
|
||||
self.num_max_dispatch_tokens_per_rank = 128
|
||||
self.num_max_dispatch_tokens_per_rank = get_int_env_var(
|
||||
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
|
||||
)
|
||||
|
||||
self.handle = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user