use topk_softmax with sgl-kernel (#4439)

This commit is contained in:
Yineng Zhang
2025-03-14 15:59:06 -07:00
committed by GitHub
parent e73167ade3
commit ad1ae7f7cd
18 changed files with 48 additions and 35 deletions

View File

@@ -20,7 +20,7 @@ jobs:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
run: |

View File

@@ -6,7 +6,7 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
run: |

View File

@@ -25,7 +25,7 @@ jobs:
runs-on: linux-mi300-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup docker
run: |
@@ -64,7 +64,7 @@ jobs:
runs-on: linux-mi300-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup docker
run: |

View File

@@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
run: |
@@ -45,7 +45,7 @@ jobs:
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install rust dependencies
run: |

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Check clang-format
uses: DoozyX/clang-format-lint-action@v0.18.1

View File

@@ -39,7 +39,7 @@ jobs:
run_tests: ${{ steps.set_run_tests.outputs.run_tests }}
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Filter changes
id: filter
uses: dorny/paths-filter@v2
@@ -72,7 +72,7 @@ jobs:
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -98,7 +98,7 @@ jobs:
part: [0, 1, 2, 3, 4, 5, 6]
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -120,7 +120,7 @@ jobs:
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -172,7 +172,7 @@ jobs:
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -218,7 +218,7 @@ jobs:
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -252,7 +252,7 @@ jobs:
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -294,7 +294,7 @@ jobs:
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:
@@ -319,7 +319,7 @@ jobs:
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
env:

View File

@@ -23,7 +23,7 @@ jobs:
build_type: ['all', 'srt']
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: "Set Date"
run: |

View File

@@ -18,7 +18,7 @@ jobs:
build_type: ['all', 'srt']
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main

View File

@@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main

View File

@@ -21,7 +21,7 @@ jobs:
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@v2

View File

@@ -20,7 +20,7 @@ jobs:
if: github.repository == 'sgl-project/sglang'
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4

View File

@@ -17,7 +17,7 @@ jobs:
environment: 'prod'
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Get version
id: get_version

View File

@@ -19,7 +19,7 @@ jobs:
python-version: '3.9'
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Upload to pypi
run: |

View File

@@ -43,7 +43,7 @@ runtime_common = [
srt = [
"sglang[runtime_common]",
"sgl-kernel==0.0.5",
"sgl-kernel==0.0.5.post1",
"flashinfer_python==0.2.3",
"torch==2.5.1",
"vllm>=0.6.4.post1,<=0.7.2",

View File

@@ -17,7 +17,9 @@ from typing import Callable, Optional
import torch
import torch.nn.functional as F
from sglang.srt.utils import get_compiler_backend
from sglang.srt.utils import get_compiler_backend, is_cuda
_is_cuda = is_cuda()
def fused_topk_native(
@@ -47,7 +49,10 @@ def fused_topk(
topk: int,
renormalize: bool,
):
from vllm import _custom_ops as ops
if _is_cuda:
from sgl_kernel import topk_softmax
else:
from vllm import _custom_ops as ops
assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
@@ -61,12 +66,20 @@ def fused_topk(
M, topk, dtype=torch.int32, device=hidden_states.device
)
ops.topk_softmax(
topk_weights,
topk_ids,
token_expert_indicies,
gating_output.float(),
)
if _is_cuda:
topk_softmax(
topk_weights,
topk_ids,
token_expert_indicies,
gating_output.float(),
)
else:
ops.topk_softmax(
topk_weights,
topk_ids,
token_expert_indicies,
gating_output.float(),
)
del token_expert_indicies
if renormalize:

View File

@@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate==1.4.0 peft pa
pip install cuda-python nvidia-cuda-nvrtc-cu12
# reinstall sgl-kernel
pip install sgl-kernel==0.0.5 --force-reinstall --no-deps
pip install sgl-kernel==0.0.5.post1 --force-reinstall --no-deps