use topk_softmax with sgl-kernel (#4439)
This commit is contained in:
2
.github/workflows/execute-notebook.yml
vendored
2
.github/workflows/execute-notebook.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
||||
2
.github/workflows/experiment-runner.yml
vendored
2
.github/workflows/experiment-runner.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
||||
2
.github/workflows/nightly-test.yml
vendored
2
.github/workflows/nightly-test.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
||||
4
.github/workflows/pr-test-amd.yml
vendored
4
.github/workflows/pr-test-amd.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
runs-on: linux-mi300-gpu-1
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup docker
|
||||
run: |
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
runs-on: linux-mi300-gpu-1
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup docker
|
||||
run: |
|
||||
|
||||
4
.github/workflows/pr-test-rust.yml
vendored
4
.github/workflows/pr-test-rust.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install rust dependencies
|
||||
run: |
|
||||
|
||||
2
.github/workflows/pr-test-sgl-kernel.yml
vendored
2
.github/workflows/pr-test-sgl-kernel.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check clang-format
|
||||
uses: DoozyX/clang-format-lint-action@v0.18.1
|
||||
|
||||
18
.github/workflows/pr-test.yml
vendored
18
.github/workflows/pr-test.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
run_tests: ${{ steps.set_run_tests.outputs.run_tests }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Filter changes
|
||||
id: filter
|
||||
uses: dorny/paths-filter@v2
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -98,7 +98,7 @@ jobs:
|
||||
part: [0, 1, 2, 3, 4, 5, 6]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -252,7 +252,7 @@ jobs:
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -294,7 +294,7 @@ jobs:
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
@@ -319,7 +319,7 @@ jobs:
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
|
||||
@@ -23,7 +23,7 @@ jobs:
|
||||
build_type: ['all', 'srt']
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: "Set Date"
|
||||
run: |
|
||||
|
||||
2
.github/workflows/release-docker-amd.yml
vendored
2
.github/workflows/release-docker-amd.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
build_type: ['all', 'srt']
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
|
||||
2
.github/workflows/release-docker-dev.yml
vendored
2
.github/workflows/release-docker-dev.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
|
||||
2
.github/workflows/release-docker.yml
vendored
2
.github/workflows/release-docker.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
run: rm -rf /opt/hostedtoolcache
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
|
||||
2
.github/workflows/release-docs.yml
vendored
2
.github/workflows/release-docs.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
||||
2
.github/workflows/release-fake-tag.yml
vendored
2
.github/workflows/release-fake-tag.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
environment: 'prod'
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version
|
||||
id: get_version
|
||||
|
||||
2
.github/workflows/release-pypi.yml
vendored
2
.github/workflows/release-pypi.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Upload to pypi
|
||||
run: |
|
||||
|
||||
@@ -43,7 +43,7 @@ runtime_common = [
|
||||
|
||||
srt = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel==0.0.5",
|
||||
"sgl-kernel==0.0.5.post1",
|
||||
"flashinfer_python==0.2.3",
|
||||
"torch==2.5.1",
|
||||
"vllm>=0.6.4.post1,<=0.7.2",
|
||||
|
||||
@@ -17,7 +17,9 @@ from typing import Callable, Optional
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from sglang.srt.utils import get_compiler_backend
|
||||
from sglang.srt.utils import get_compiler_backend, is_cuda
|
||||
|
||||
_is_cuda = is_cuda()
|
||||
|
||||
|
||||
def fused_topk_native(
|
||||
@@ -47,7 +49,10 @@ def fused_topk(
|
||||
topk: int,
|
||||
renormalize: bool,
|
||||
):
|
||||
from vllm import _custom_ops as ops
|
||||
if _is_cuda:
|
||||
from sgl_kernel import topk_softmax
|
||||
else:
|
||||
from vllm import _custom_ops as ops
|
||||
|
||||
assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
|
||||
|
||||
@@ -61,12 +66,20 @@ def fused_topk(
|
||||
M, topk, dtype=torch.int32, device=hidden_states.device
|
||||
)
|
||||
|
||||
ops.topk_softmax(
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
token_expert_indicies,
|
||||
gating_output.float(),
|
||||
)
|
||||
if _is_cuda:
|
||||
topk_softmax(
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
token_expert_indicies,
|
||||
gating_output.float(),
|
||||
)
|
||||
else:
|
||||
ops.topk_softmax(
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
token_expert_indicies,
|
||||
gating_output.float(),
|
||||
)
|
||||
del token_expert_indicies
|
||||
|
||||
if renormalize:
|
||||
|
||||
@@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate==1.4.0 peft pa
|
||||
pip install cuda-python nvidia-cuda-nvrtc-cu12
|
||||
|
||||
# reinstall sgl-kernel
|
||||
pip install sgl-kernel==0.0.5 --force-reinstall --no-deps
|
||||
pip install sgl-kernel==0.0.5.post1 --force-reinstall --no-deps
|
||||
|
||||
Reference in New Issue
Block a user