Fix torch compile run (#7391)

Co-authored-by: wunhuang <wunhuang@amd.com>
Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
This commit is contained in:
kk
2025-06-23 06:33:09 +08:00
committed by GitHub
parent 50f1b6d6b1
commit bd4f581896
4 changed files with 12 additions and 11 deletions

View File

@@ -32,6 +32,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
if _use_aiter:
from aiter import ActivationType
from aiter.fused_moe import fused_moe
from aiter.fused_moe_bf16_asm import ck_moe_2stages
from aiter.ops.shuffle import shuffle_weight
@@ -204,7 +205,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
topk_weights, dtype=torch.float32
) # topk_weights must be FP32 (float32)
return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,

View File

@@ -1052,15 +1052,15 @@ class Fp8MoEMethod:
if _use_hip_int4:
# TODO: add triton kernel and add check _use_aiter
assert not no_combine, f"{no_combine=} is not supported."
return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
topk_weights,
topk_ids,
QuantType.per_Token,
layer.w13_weight_scale1,
layer.w2_weight_scale1,
quant_type=QuantType.per_Token,
w1_scale=layer.w13_weight_scale1,
w2_scale=layer.w2_weight_scale1,
activation=(
ActivationType.Silu if activation == "silu" else ActivationType.Gelu
),
@@ -1086,15 +1086,15 @@ class Fp8MoEMethod:
expert_mask=None,
)
else:
return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
topk_weights,
topk_ids,
QuantType.per_Token,
layer.w13_weight_scale1,
layer.w2_weight_scale1,
quant_type=QuantType.per_Token,
w1_scale=layer.w13_weight_scale1,
w2_scale=layer.w2_weight_scale1,
activation=(
ActivationType.Silu
if activation == "silu"