From 9c53dad80993157713f528584688ffc33fc2f985 Mon Sep 17 00:00:00 2001
From: Jue WANG <zjuwangjue@gmail.com>
Date: Mon, 22 Sep 2025 17:21:57 -0400
Subject: [PATCH] Fix MTP MoE weight loading with NVFP4 target model. (#10758)

---
 python/sglang/srt/layers/moe/fused_moe_triton/layer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
index 81355c4f9..241f8b142 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
@@ -575,7 +575,10 @@ class FusedMoE(torch.nn.Module):
             )
 
         # Flashinfer assumes w31 format for w13_weight. Same for the scales.
-        if should_use_flashinfer_trtllm_moe():
+        if (
+            should_use_flashinfer_trtllm_moe()
+            and self.quant_method.__class__.__name__ == "ModelOptNvFp4FusedMoEMethod"
+        ):
             shard_id = {"w1": "w3", "w3": "w1", "w2": "w2"}[shard_id]
 
         WEIGHT_SCALE_SUPPORTED = [e.value for e in FusedMoeWeightScaleSupported]