fix moe_align_block_size_kernel for shared memory issue (#2579)

Co-authored-by: ispobock <ispobaoke@163.com>
This commit is contained in:
Yineng Zhang
2024-12-26 05:31:04 +08:00
committed by GitHub
parent 53aed988cb
commit 31548116a8
6 changed files with 225 additions and 2 deletions

View File

@@ -109,6 +109,26 @@ setup(
libraries=["c10", "torch", "torch_python"],
extra_link_args=["-Wl,-rpath,$ORIGIN/../../torch/lib"],
),
CUDAExtension(
"sgl_kernel.ops.moe_align_block_size",
[
"src/sgl-kernel/csrc/moe_align_kernel.cu",
],
extra_compile_args={
"nvcc": [
"-O3",
"-Xcompiler",
"-fPIC",
"-gencode=arch=compute_75,code=sm_75",
"-gencode=arch=compute_80,code=sm_80",
"-gencode=arch=compute_89,code=sm_89",
"-gencode=arch=compute_90,code=sm_90",
],
"cxx": ["-O3"],
},
libraries=["c10", "torch", "torch_python"],
extra_link_args=["-Wl,-rpath,$ORIGIN/../../torch/lib"],
),
],
cmdclass={"build_ext": BuildExtension},
install_requires=["torch"],