adapt to dsv32 on dcu

2025-09-30 18:37:31 +08:00
parent 8f7453e3af
commit 852a49c5cc
159 changed files with 7211 additions and 7782 deletions
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -843,18 +843,10 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
            topk_weights = topk_weights.to(
                torch.float32
            )  # aiter's moe_sorting requires topk_weights to be FP32
-
-        if hasattr(torch, "float4_e2m1fn_x2"):
-            w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
-            w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
-        else:
-            w13_weight = layer.w13_weight
-            w2_weight = layer.w2_weight
-
        output = fused_moe(
            x,
-            w13_weight,
-            w2_weight,
+            layer.w13_weight,
+            layer.w2_weight,
            topk_weights,
            topk_ids,
            quant_type=QuantType.per_1x32,