[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using aiter-mxfp4-moe (#10981)

Co-authored-by: wunhuang <wunhuang@amd.com>
2025-09-27 13:12:22 +08:00
parent 8260574729
commit 8ebf72fef3
2 changed files with 19 additions and 4 deletions
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -843,10 +843,18 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
            topk_weights = topk_weights.to(
                torch.float32
            )  # aiter's moe_sorting requires topk_weights to be FP32
+
+        if hasattr(torch, "float4_e2m1fn_x2"):
+            w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
+            w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
+        else:
+            w13_weight = layer.w13_weight
+            w2_weight = layer.w2_weight
+
        output = fused_moe(
            x,
-            layer.w13_weight,
-            layer.w2_weight,
+            w13_weight,
+            w2_weight,
            topk_weights,
            topk_ids,
            quant_type=QuantType.per_1x32,
--- a/python/sglang/srt/layers/quantization/quark/quark_moe.py
+++ b/python/sglang/srt/layers/quantization/quark/quark_moe.py
@@ -183,10 +183,17 @@ class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod):
        moe_runner_config = self.moe_runner_config
        topk_weights, topk_ids, _ = topk_output

+        if hasattr(torch, "float4_e2m1fn_x2"):
+            w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
+            w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
+        else:
+            w13_weight = layer.w13_weight
+            w2_weight = layer.w2_weight
+
        output = fused_moe(
            x,
-            layer.w13_weight,
-            layer.w2_weight,
+            w13_weight,
+            w2_weight,
            topk_weights,
            topk_ids,
            quant_type=QuantType.per_1x32,