Fix torch compile run (#7391)
Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
This commit is contained in:
@@ -32,6 +32,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
|
||||
|
||||
if _use_aiter:
|
||||
from aiter import ActivationType
|
||||
from aiter.fused_moe import fused_moe
|
||||
from aiter.fused_moe_bf16_asm import ck_moe_2stages
|
||||
from aiter.ops.shuffle import shuffle_weight
|
||||
|
||||
@@ -204,7 +205,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
topk_weights, dtype=torch.float32
|
||||
) # topk_weights must be FP32 (float32)
|
||||
|
||||
return ck_moe_2stages(
|
||||
return fused_moe(
|
||||
x,
|
||||
layer.w13_weight,
|
||||
layer.w2_weight,
|
||||
|
||||
@@ -1052,15 +1052,15 @@ class Fp8MoEMethod:
|
||||
if _use_hip_int4:
|
||||
# TODO: add triton kernel and add check _use_aiter
|
||||
assert not no_combine, f"{no_combine=} is not supported."
|
||||
return ck_moe_2stages(
|
||||
return fused_moe(
|
||||
x,
|
||||
layer.w13_weight,
|
||||
layer.w2_weight,
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
QuantType.per_Token,
|
||||
layer.w13_weight_scale1,
|
||||
layer.w2_weight_scale1,
|
||||
quant_type=QuantType.per_Token,
|
||||
w1_scale=layer.w13_weight_scale1,
|
||||
w2_scale=layer.w2_weight_scale1,
|
||||
activation=(
|
||||
ActivationType.Silu if activation == "silu" else ActivationType.Gelu
|
||||
),
|
||||
@@ -1086,15 +1086,15 @@ class Fp8MoEMethod:
|
||||
expert_mask=None,
|
||||
)
|
||||
else:
|
||||
return ck_moe_2stages(
|
||||
return fused_moe(
|
||||
x,
|
||||
layer.w13_weight,
|
||||
layer.w2_weight,
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
QuantType.per_Token,
|
||||
layer.w13_weight_scale1,
|
||||
layer.w2_weight_scale1,
|
||||
quant_type=QuantType.per_Token,
|
||||
w1_scale=layer.w13_weight_scale1,
|
||||
w2_scale=layer.w2_weight_scale1,
|
||||
activation=(
|
||||
ActivationType.Silu
|
||||
if activation == "silu"
|
||||
|
||||
Reference in New Issue
Block a user