diff --git a/python/sglang/srt/models/bailing_moe.py b/python/sglang/srt/models/bailing_moe.py index b6063aa2c..23313cb42 100644 --- a/python/sglang/srt/models/bailing_moe.py +++ b/python/sglang/srt/models/bailing_moe.py @@ -54,7 +54,7 @@ from sglang.srt.layers.linear import ( RowParallelLinear, ) from sglang.srt.layers.logits_processor import LogitsProcessor -from sglang.srt.layers.moe import get_moe_a2a_backend +from sglang.srt.layers.moe import get_deepep_mode, get_moe_a2a_backend from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.moe.token_dispatcher import DeepEPDispatcher @@ -293,7 +293,7 @@ class BailingMoESparseMoeBlock(nn.Module): num_local_experts=config.num_experts // self.tp_size, hidden_size=config.hidden_size, params_dtype=config.torch_dtype, - deepep_mode=DeepEPMode[global_server_args_dict["deepep_mode"]], + deepep_mode=get_deepep_mode(), async_finish=True, # TODO return_recv_hook=True, )