From 476c67d7fcfea316f23d24afe90a8f679f0490a4 Mon Sep 17 00:00:00 2001 From: Shangming Cai Date: Thu, 16 Oct 2025 10:13:08 +0800 Subject: [PATCH] Fix missing a2a backend init of GLM4.5 MoE Block (#11692) Signed-off-by: Shangming Cai --- python/sglang/srt/models/glm4_moe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index 5080bf88f..35ce0c40d 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -467,7 +467,7 @@ class Glm4MoeSparseMoeBlock(DeepseekV2MoE): self.top_k = config.num_experts_per_tok - if get_moe_a2a_backend().is_deepep(): + if get_moe_a2a_backend().is_deepep() or get_moe_a2a_backend().is_mooncake(): # TODO: we will support tp < ep in the future self.ep_size = get_moe_expert_parallel_world_size() self.num_experts = ( @@ -496,7 +496,9 @@ class Glm4MoeSparseMoeBlock(DeepseekV2MoE): return_recv_hook=True, ) - self._enable_deepep_moe = get_moe_a2a_backend().is_deepep() + self._enable_a2a_moe = ( + get_moe_a2a_backend().is_deepep() or get_moe_a2a_backend().is_mooncake() + ) def forward_normal_dual_stream( self,