ROCm: sgl-kernel enablement starting with sgl_moe_align_block (#3287)

2025-02-04 05:44:44 -08:00
parent d39899e85c
commit 2c1a695ff1
6 changed files with 131 additions and 13 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -31,7 +31,7 @@ srt = [

 # HIP (Heterogeneous-computing Interface for Portability) for AMD
 # => base docker rocm/vllm-dev:20241022, not from public vllm whl
-srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.7.dev2", "outlines==0.1.11"]
+srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.7.dev2", "outlines==0.1.11", "sgl-kernel>=0.0.3.post1"]
 # xpu is not enabled in public vllm and torch whl,
 # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
 srt_xpu = ["sglang[runtime_common]", "outlines>=0.0.44,<0.1.0"]
--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
@@ -15,18 +15,10 @@ from vllm import _custom_ops as ops

 from sglang.srt.layers.moe.topk import select_experts
 from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
-from sglang.srt.utils import (
-    direct_register_custom_op,
-    get_device_name,
-    is_cuda_available,
-    is_hip,
-)
+from sglang.srt.utils import direct_register_custom_op, get_device_name, is_hip

-is_cuda = is_cuda_available()
 is_hip_flag = is_hip()
-if is_cuda:
-    from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size
-
+from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size

 logger = logging.getLogger(__name__)
 padding_size = 128 if bool(int(os.getenv("MOE_PADDING", "0"))) else 0
@@ -415,7 +407,7 @@ def moe_align_block_size(
    )
    num_tokens_post_pad = torch.empty((1), dtype=torch.int32, device=topk_ids.device)
    if num_experts >= 224:
-        if enable_moe_align_block_size_triton or is_hip_flag:
+        if enable_moe_align_block_size_triton:
            moe_align_block_size_triton(
                topk_ids,
                num_experts,