From bd4f581896692b27d977e14e4c2cac39c4073845 Mon Sep 17 00:00:00 2001 From: kk <43161300+kkHuang-amd@users.noreply.github.com> Date: Mon, 23 Jun 2025 06:33:09 +0800 Subject: [PATCH] Fix torch compile run (#7391) Co-authored-by: wunhuang Co-authored-by: Sai Enduri --- docker/Dockerfile.rocm | 2 +- .../srt/layers/moe/fused_moe_triton/layer.py | 3 ++- python/sglang/srt/layers/quantization/fp8.py | 16 ++++++++-------- scripts/amd_ci_start_container.sh | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 3c9870125..f33de182d 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -18,7 +18,7 @@ ARG TRITON_COMMIT="improve_fa_decode_3.0.0" ARG AITER_REPO="https://github.com/ROCm/aiter.git" -ARG AITER_COMMIT="v0.1.2" +ARG AITER_COMMIT="v0.1.3" RUN git clone ${SGL_REPO} \ && cd sglang \ diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index 6a82db210..5479c7aca 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -32,6 +32,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip if _use_aiter: from aiter import ActivationType + from aiter.fused_moe import fused_moe from aiter.fused_moe_bf16_asm import ck_moe_2stages from aiter.ops.shuffle import shuffle_weight @@ -204,7 +205,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): topk_weights, dtype=torch.float32 ) # topk_weights must be FP32 (float32) - return ck_moe_2stages( + return fused_moe( x, layer.w13_weight, layer.w2_weight, diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 36807aeda..bc325aa2c 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -1052,15 +1052,15 @@ class Fp8MoEMethod: if _use_hip_int4: # TODO: add triton kernel and add check _use_aiter assert not no_combine, f"{no_combine=} is not supported." - return ck_moe_2stages( + return fused_moe( x, layer.w13_weight, layer.w2_weight, topk_weights, topk_ids, - QuantType.per_Token, - layer.w13_weight_scale1, - layer.w2_weight_scale1, + quant_type=QuantType.per_Token, + w1_scale=layer.w13_weight_scale1, + w2_scale=layer.w2_weight_scale1, activation=( ActivationType.Silu if activation == "silu" else ActivationType.Gelu ), @@ -1086,15 +1086,15 @@ class Fp8MoEMethod: expert_mask=None, ) else: - return ck_moe_2stages( + return fused_moe( x, layer.w13_weight, layer.w2_weight, topk_weights, topk_ids, - QuantType.per_Token, - layer.w13_weight_scale1, - layer.w2_weight_scale1, + quant_type=QuantType.per_Token, + w1_scale=layer.w13_weight_scale1, + w2_scale=layer.w2_weight_scale1, activation=( ActivationType.Silu if activation == "silu" diff --git a/scripts/amd_ci_start_container.sh b/scripts/amd_ci_start_container.sh index bf3d52890..f7f20d57e 100755 --- a/scripts/amd_ci_start_container.sh +++ b/scripts/amd_ci_start_container.sh @@ -9,7 +9,7 @@ else fi # Pull the image -IMAGE="lmsysorg/sglang:v0.4.6.post5-rocm630" +IMAGE="ghcr.io/saienduri/sglang:aiter-1.3" echo "Pulling Docker image: $IMAGE" docker pull "$IMAGE"