diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index 5080bf88f..35ce0c40d 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -467,7 +467,7 @@ class Glm4MoeSparseMoeBlock(DeepseekV2MoE): self.top_k = config.num_experts_per_tok - if get_moe_a2a_backend().is_deepep(): + if get_moe_a2a_backend().is_deepep() or get_moe_a2a_backend().is_mooncake(): # TODO: we will support tp < ep in the future self.ep_size = get_moe_expert_parallel_world_size() self.num_experts = ( @@ -496,7 +496,9 @@ class Glm4MoeSparseMoeBlock(DeepseekV2MoE): return_recv_hook=True, ) - self._enable_deepep_moe = get_moe_a2a_backend().is_deepep() + self._enable_a2a_moe = ( + get_moe_a2a_backend().is_deepep() or get_moe_a2a_backend().is_mooncake() + ) def forward_normal_dual_stream( self,