fix: solve cu118 issue for cutlass mla (#5331)
This commit is contained in:
18
.github/workflows/pr-test-sgl-kernel.yml
vendored
18
.github/workflows/pr-test-sgl-kernel.yml
vendored
@@ -35,9 +35,14 @@ jobs:
|
||||
runs-on: sgl-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9']
|
||||
cuda-version: ['12.4']
|
||||
|
||||
include:
|
||||
- python-version: '3.9'
|
||||
cuda-version: '11.8'
|
||||
- python-version: '3.9'
|
||||
cuda-version: '12.4'
|
||||
- python-version: '3.9'
|
||||
cuda-version: '12.8'
|
||||
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
@@ -52,13 +57,14 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
||||
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (only for CUDA 12.4)
|
||||
if: ${{ matrix.cuda-version == '12.4' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
@@ -128,7 +134,7 @@ jobs:
|
||||
pip3 uninstall sgl-kernel -y
|
||||
|
||||
finish:
|
||||
needs: [unit-test, mla-test, lint]
|
||||
needs: [unit-test, mla-test, lint, build-wheels]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
|
||||
2
.github/workflows/release-whl-kernel.yml
vendored
2
.github/workflows/release-whl-kernel.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
jobs:
|
||||
build-wheels:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: sgl-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9']
|
||||
|
||||
@@ -25,6 +25,8 @@ limitations under the License.
|
||||
#include <device/sm100_mla.hpp>
|
||||
#include <kernel/sm100_mla_tile_scheduler.hpp>
|
||||
|
||||
#if defined CUDA_VERSION && CUDA_VERSION >= 12040
|
||||
|
||||
#define CUTLASS_CHECK(status) \
|
||||
{ \
|
||||
cutlass::Status error = status; \
|
||||
@@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches,
|
||||
|
||||
return MlaSm100Type::Fmha::get_workspace_size(arguments);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user