[Feature] Integrate DeepEP into SGLang (#4232)
Co-authored-by: Cheng Wan <cwan39@gatech.edu> Co-authored-by: Xuting Zhou <xutingz@nvidia.com>
This commit is contained in:
@@ -145,6 +145,7 @@ class ModelRunner:
|
||||
"enable_nan_detection": server_args.enable_nan_detection,
|
||||
"enable_dp_attention": server_args.enable_dp_attention,
|
||||
"enable_ep_moe": server_args.enable_ep_moe,
|
||||
"enable_deepep_moe": server_args.enable_deepep_moe,
|
||||
"device": server_args.device,
|
||||
"speculative_accept_threshold_single": server_args.speculative_accept_threshold_single,
|
||||
"speculative_accept_threshold_acc": server_args.speculative_accept_threshold_acc,
|
||||
@@ -277,6 +278,12 @@ class ModelRunner:
|
||||
server_args.chunked_prefill_size = -1
|
||||
server_args.disable_radix_cache = True
|
||||
|
||||
if server_args.enable_deepep_moe:
|
||||
logger.info("DeepEP is turned on.")
|
||||
assert (
|
||||
server_args.enable_dp_attention == True
|
||||
), "Currently DeepEP is bind to Attention DP. Set '--enable-dp-attention --enable-deepep-moe'"
|
||||
|
||||
def init_torch_distributed(self):
|
||||
logger.info("Init torch distributed begin.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user