diff --git a/.github/workflows/execute-notebook.yml b/.github/workflows/execute-notebook.yml index f1cf30ac3..fdb73c2c4 100644 --- a/.github/workflows/execute-notebook.yml +++ b/.github/workflows/execute-notebook.yml @@ -20,7 +20,7 @@ jobs: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/experiment-runner.yml b/.github/workflows/experiment-runner.yml index 5ccb8ad28..f3382320b 100644 --- a/.github/workflows/experiment-runner.yml +++ b/.github/workflows/experiment-runner.yml @@ -17,7 +17,7 @@ jobs: runs-on: 1-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2ca571f48..393f7272c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,7 +6,7 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/nightly-test.yml b/.github/workflows/nightly-test.yml index 577339be6..23b8bb44d 100644 --- a/.github/workflows/nightly-test.yml +++ b/.github/workflows/nightly-test.yml @@ -20,7 +20,7 @@ jobs: runs-on: 2-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 3e1f4e051..3ce2a4992 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -25,7 +25,7 @@ jobs: runs-on: linux-mi300-gpu-1 steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup docker run: | @@ -64,7 +64,7 @@ jobs: runs-on: linux-mi300-gpu-1 steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup docker run: | diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml index 277ddef77..3b00e72b1 100644 --- a/.github/workflows/pr-test-rust.yml +++ b/.github/workflows/pr-test-rust.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | @@ -45,7 +45,7 @@ jobs: runs-on: 2-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install rust dependencies run: | diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index 0c38901f0..5329da8cc 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Check clang-format uses: DoozyX/clang-format-lint-action@v0.18.1 diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index f58a3bc64..530c7011c 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -39,7 +39,7 @@ jobs: run_tests: ${{ steps.set_run_tests.outputs.run_tests }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Filter changes id: filter uses: dorny/paths-filter@v2 @@ -72,7 +72,7 @@ jobs: runs-on: 1-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -98,7 +98,7 @@ jobs: part: [0, 1, 2, 3, 4, 5, 6] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -120,7 +120,7 @@ jobs: runs-on: 2-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -172,7 +172,7 @@ jobs: runs-on: 1-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -218,7 +218,7 @@ jobs: runs-on: 1-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -252,7 +252,7 @@ jobs: runs-on: 2-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -294,7 +294,7 @@ jobs: runs-on: 1-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: @@ -319,7 +319,7 @@ jobs: runs-on: 2-gpu-runner steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies env: diff --git a/.github/workflows/release-docker-amd-nightly.yml b/.github/workflows/release-docker-amd-nightly.yml index 489adefae..8923facad 100644 --- a/.github/workflows/release-docker-amd-nightly.yml +++ b/.github/workflows/release-docker-amd-nightly.yml @@ -23,7 +23,7 @@ jobs: build_type: ['all', 'srt'] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Set Date" run: | diff --git a/.github/workflows/release-docker-amd.yml b/.github/workflows/release-docker-amd.yml index ffe2843d5..f93ae3178 100644 --- a/.github/workflows/release-docker-amd.yml +++ b/.github/workflows/release-docker-amd.yml @@ -18,7 +18,7 @@ jobs: build_type: ['all', 'srt'] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Free disk space uses: jlumbroso/free-disk-space@main diff --git a/.github/workflows/release-docker-dev.yml b/.github/workflows/release-docker-dev.yml index 1526f802e..9023eaa75 100644 --- a/.github/workflows/release-docker-dev.yml +++ b/.github/workflows/release-docker-dev.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Free disk space uses: jlumbroso/free-disk-space@main diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index d5669886d..e4d257b50 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -21,7 +21,7 @@ jobs: run: rm -rf /opt/hostedtoolcache - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to Docker Hub uses: docker/login-action@v2 diff --git a/.github/workflows/release-docs.yml b/.github/workflows/release-docs.yml index f26ca5c8b..661865ef8 100644 --- a/.github/workflows/release-docs.yml +++ b/.github/workflows/release-docs.yml @@ -20,7 +20,7 @@ jobs: if: github.repository == 'sgl-project/sglang' steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/release-fake-tag.yml b/.github/workflows/release-fake-tag.yml index c4b1c338a..ce5999506 100644 --- a/.github/workflows/release-fake-tag.yml +++ b/.github/workflows/release-fake-tag.yml @@ -17,7 +17,7 @@ jobs: environment: 'prod' steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get version id: get_version diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index cd44696d4..b0f8814ca 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -19,7 +19,7 @@ jobs: python-version: '3.9' - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Upload to pypi run: | diff --git a/python/pyproject.toml b/python/pyproject.toml index 3aa19fc91..ebc8139f2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -43,7 +43,7 @@ runtime_common = [ srt = [ "sglang[runtime_common]", - "sgl-kernel==0.0.5", + "sgl-kernel==0.0.5.post1", "flashinfer_python==0.2.3", "torch==2.5.1", "vllm>=0.6.4.post1,<=0.7.2", diff --git a/python/sglang/srt/layers/moe/topk.py b/python/sglang/srt/layers/moe/topk.py index e808a0a20..7830ff866 100644 --- a/python/sglang/srt/layers/moe/topk.py +++ b/python/sglang/srt/layers/moe/topk.py @@ -17,7 +17,9 @@ from typing import Callable, Optional import torch import torch.nn.functional as F -from sglang.srt.utils import get_compiler_backend +from sglang.srt.utils import get_compiler_backend, is_cuda + +_is_cuda = is_cuda() def fused_topk_native( @@ -47,7 +49,10 @@ def fused_topk( topk: int, renormalize: bool, ): - from vllm import _custom_ops as ops + if _is_cuda: + from sgl_kernel import topk_softmax + else: + from vllm import _custom_ops as ops assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch" @@ -61,12 +66,20 @@ def fused_topk( M, topk, dtype=torch.int32, device=hidden_states.device ) - ops.topk_softmax( - topk_weights, - topk_ids, - token_expert_indicies, - gating_output.float(), - ) + if _is_cuda: + topk_softmax( + topk_weights, + topk_ids, + token_expert_indicies, + gating_output.float(), + ) + else: + ops.topk_softmax( + topk_weights, + topk_ids, + token_expert_indicies, + gating_output.float(), + ) del token_expert_indicies if renormalize: diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index 139331578..3bf8050db 100755 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate==1.4.0 peft pa pip install cuda-python nvidia-cuda-nvrtc-cu12 # reinstall sgl-kernel -pip install sgl-kernel==0.0.5 --force-reinstall --no-deps +pip install sgl-kernel==0.0.5.post1 --force-reinstall --no-deps