[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using aiter-mxfp4-moe (#10981)
Co-authored-by: wunhuang <wunhuang@amd.com>
This commit is contained in:
@@ -843,10 +843,18 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
|
|||||||
topk_weights = topk_weights.to(
|
topk_weights = topk_weights.to(
|
||||||
torch.float32
|
torch.float32
|
||||||
) # aiter's moe_sorting requires topk_weights to be FP32
|
) # aiter's moe_sorting requires topk_weights to be FP32
|
||||||
|
|
||||||
|
if hasattr(torch, "float4_e2m1fn_x2"):
|
||||||
|
w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
|
||||||
|
w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
|
||||||
|
else:
|
||||||
|
w13_weight = layer.w13_weight
|
||||||
|
w2_weight = layer.w2_weight
|
||||||
|
|
||||||
output = fused_moe(
|
output = fused_moe(
|
||||||
x,
|
x,
|
||||||
layer.w13_weight,
|
w13_weight,
|
||||||
layer.w2_weight,
|
w2_weight,
|
||||||
topk_weights,
|
topk_weights,
|
||||||
topk_ids,
|
topk_ids,
|
||||||
quant_type=QuantType.per_1x32,
|
quant_type=QuantType.per_1x32,
|
||||||
|
|||||||
@@ -183,10 +183,17 @@ class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod):
|
|||||||
moe_runner_config = self.moe_runner_config
|
moe_runner_config = self.moe_runner_config
|
||||||
topk_weights, topk_ids, _ = topk_output
|
topk_weights, topk_ids, _ = topk_output
|
||||||
|
|
||||||
|
if hasattr(torch, "float4_e2m1fn_x2"):
|
||||||
|
w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
|
||||||
|
w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
|
||||||
|
else:
|
||||||
|
w13_weight = layer.w13_weight
|
||||||
|
w2_weight = layer.w2_weight
|
||||||
|
|
||||||
output = fused_moe(
|
output = fused_moe(
|
||||||
x,
|
x,
|
||||||
layer.w13_weight,
|
w13_weight,
|
||||||
layer.w2_weight,
|
w2_weight,
|
||||||
topk_weights,
|
topk_weights,
|
||||||
topk_ids,
|
topk_ids,
|
||||||
quant_type=QuantType.per_1x32,
|
quant_type=QuantType.per_1x32,
|
||||||
|
|||||||
Reference in New Issue
Block a user