adapt to sglang v0.5.2rc1 on dcu

This commit is contained in:
maxiao
2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
# ninja log v5
4 12793 1756950714004023222 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o c4ef5c8f5ca38169
4 22797 1756950724004023564 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o 983ca8e755dab2fa
3 24387 1756950725592023618 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o 95a36ff854253806
4 29769 1756950730660023792 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o d99ffa7422128b8b
3 47747 1756950748952024417 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o ffc38859847f9f7
32 22789 1756951323296044067 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o 51f92c934bf3b3a4
32 22927 1756951323432044072 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o 3cbca550065cd70f
32 23521 1756951324028044092 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o c8aa216837c116a0
33 26809 1756951327312044205 /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o a28bfee01bc84adc

View File

@@ -0,0 +1,38 @@
ninja_required_version = 1.3
cxx = c++
nvcc = /opt/dtk/bin/hipcc
cflags = -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -I/home/git_sglang/sglang/sgl-kernel/include -I/home/git_sglang/sglang/sgl-kernel/csrc -I/usr/local/lib/python3.10/dist-packages/torch/include -I/usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.10/dist-packages/torch/include/TH -I/usr/local/lib/python3.10/dist-packages/torch/include/THC -I/usr/local/lib/python3.10/dist-packages/torch/include/THH -I/opt/dtk/include -I/usr/include/python3.10 -c
post_cflags = -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -O3 -Wno-switch-bool -Wno-macro-redefined -Wno-deprecated-declarations -w -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=common_ops -D_GLIBCXX_USE_CXX11_ABI=1 -std=c++17
cuda_cflags = -I/home/git_sglang/sglang/sgl-kernel/include -I/home/git_sglang/sglang/sgl-kernel/csrc -I/usr/local/lib/python3.10/dist-packages/torch/include -I/usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.10/dist-packages/torch/include/TH -I/usr/local/lib/python3.10/dist-packages/torch/include/THC -I/usr/local/lib/python3.10/dist-packages/torch/include/THH -I/opt/dtk/include -I/usr/include/python3.10 -c
cuda_post_cflags = -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -D__HIP_NO_HALF_OPERATORS__=1 -D__HIP_NO_HALF_CONVERSIONS__=1 -fPIC -O3 -std=c++17 -D__HIP_PLATFORM_HCC__=1 --offload-arch=gfx928 --offload-arch=gfx936 --gpu-max-threads-per-block=1024 -Wno-macro-redefined '' -funroll-loops -Rpass-analysis=unroll-loops -w -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=common_ops -D_GLIBCXX_USE_CXX11_ABI=1 -fno-gpu-rdc
cuda_dlink_post_cflags =
ldflags =
rule compile
command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
depfile = $out.d
deps = gcc
rule cuda_compile
command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/allreduce/custom_all_reduce.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/allreduce/quick_all_reduce.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o: compile /home/git_sglang/sglang/sgl-kernel/csrc/common_extension_rocm.cc
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/elementwise/activation.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/kvcacheio/transfer.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/moe/moe_align_kernel.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/moe/moe_topk_softmax_kernels.hip
build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/speculative/eagle_utils.hip