adapt to sglang v0.5.2rc1 on dcu

2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/.ninja_deps
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/.ninja_deps
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/.ninja_log
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/.ninja_log
@@ -0,0 +1,10 @@
+# ninja log v5
+4	12793	1756950714004023222	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o	c4ef5c8f5ca38169
+4	22797	1756950724004023564	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o	983ca8e755dab2fa
+3	24387	1756950725592023618	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o	95a36ff854253806
+4	29769	1756950730660023792	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o	d99ffa7422128b8b
+3	47747	1756950748952024417	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o	ffc38859847f9f7
+32	22789	1756951323296044067	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o	51f92c934bf3b3a4
+32	22927	1756951323432044072	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o	3cbca550065cd70f
+32	23521	1756951324028044092	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o	c8aa216837c116a0
+33	26809	1756951327312044205	/home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o	a28bfee01bc84adc
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/build.ninja
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/build.ninja
@@ -0,0 +1,38 @@
+ninja_required_version = 1.3
+cxx = c++
+nvcc = /opt/dtk/bin/hipcc
+
+cflags = -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -I/home/git_sglang/sglang/sgl-kernel/include -I/home/git_sglang/sglang/sgl-kernel/csrc -I/usr/local/lib/python3.10/dist-packages/torch/include -I/usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.10/dist-packages/torch/include/TH -I/usr/local/lib/python3.10/dist-packages/torch/include/THC -I/usr/local/lib/python3.10/dist-packages/torch/include/THH -I/opt/dtk/include -I/usr/include/python3.10 -c
+post_cflags = -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -O3 -Wno-switch-bool -Wno-macro-redefined -Wno-deprecated-declarations -w -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=common_ops -D_GLIBCXX_USE_CXX11_ABI=1 -std=c++17
+cuda_cflags = -I/home/git_sglang/sglang/sgl-kernel/include -I/home/git_sglang/sglang/sgl-kernel/csrc -I/usr/local/lib/python3.10/dist-packages/torch/include -I/usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -I/usr/local/lib/python3.10/dist-packages/torch/include/TH -I/usr/local/lib/python3.10/dist-packages/torch/include/THC -I/usr/local/lib/python3.10/dist-packages/torch/include/THH -I/opt/dtk/include -I/usr/include/python3.10 -c
+cuda_post_cflags = -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -D__HIP_NO_HALF_OPERATORS__=1 -D__HIP_NO_HALF_CONVERSIONS__=1 -fPIC -O3 -std=c++17 -D__HIP_PLATFORM_HCC__=1 --offload-arch=gfx928 --offload-arch=gfx936 --gpu-max-threads-per-block=1024 -Wno-macro-redefined '' -funroll-loops -Rpass-analysis=unroll-loops -w -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=common_ops -D_GLIBCXX_USE_CXX11_ABI=1 -fno-gpu-rdc
+cuda_dlink_post_cflags = 
+ldflags = 
+
+rule compile
+  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
+  depfile = $out.d
+  deps = gcc
+
+rule cuda_compile
+  command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
+
+
+
+
+
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/allreduce/custom_all_reduce.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/allreduce/quick_all_reduce.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o: compile /home/git_sglang/sglang/sgl-kernel/csrc/common_extension_rocm.cc
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/elementwise/activation.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/kvcacheio/transfer.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/moe/moe_align_kernel.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/moe/moe_topk_softmax_kernels.hip
+build /home/git_sglang/sglang/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o: cuda_compile /home/git_sglang/sglang/sgl-kernel/csrc/speculative/eagle_utils.hip
+
+
+
+
+
+
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/custom_all_reduce.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/allreduce/quick_all_reduce.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/common_extension_rocm.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/elementwise/activation.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/grammar/apply_token_bitmask_inplace_cuda.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/kvcacheio/transfer.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_align_kernel.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/moe/moe_topk_softmax_kernels.o
--- a/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o
+++ b/sgl-kernel/build/temp.linux-x86_64-cpython-310/csrc/speculative/eagle_utils.o