From 191d836ff616c3097c7462bfade1e707cac6e324 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Sat, 12 Jul 2025 05:20:58 +0800 Subject: [PATCH] fix: minor fix for modelopt weight load compatibility (#7953) --- python/sglang/srt/layers/moe/fused_moe_triton/layer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index 6122e0ded..ad495d595 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -518,6 +518,7 @@ class FusedMoE(torch.nn.Module): self.quant_method.enable_flashinfer_moe = self.enable_flashinfer_moe assert self.quant_method is not None + self.quant_config = quant_config self.quant_method.create_weights( layer=self, num_experts=self.local_num_experts, @@ -661,7 +662,11 @@ class FusedMoE(torch.nn.Module): ): raise ValueError("expert_data and loaded_weight must be torch.Tensor") - if expert_data.dim() != 2 or loaded_weight.dim() != 2: + if ( + self.quant_config is not None + and "modelopt" in self.quant_config.get_name() + and (expert_data.dim() != 2 or loaded_weight.dim() != 2) + ): raise ValueError( f"Expected 2D tensors, got expert_data shape {expert_data.shape} and loaded_weight shape {loaded_weight.shape}" )