adapt to sglang v0.5.2rc1 on dcu

2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions
--- a/sgl-kernel/setup_rocm.py
+++ b/sgl-kernel/setup_rocm.py
@@ -0,0 +1,116 @@
+# Copyright 2025 SGLang Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+import platform
+import sys
+from pathlib import Path
+
+import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+root = Path(__file__).parent.resolve()
+arch = platform.machine().lower()
+
+
+def _get_version():
+    with open(root / "pyproject.toml") as f:
+        for line in f:
+            if line.startswith("version"):
+                return line.split("=")[1].strip().strip('"')
+
+
+operator_namespace = "sgl_kernel"
+include_dirs = [
+    root / "include",
+    root / "include" / "impl",
+    root / "csrc",
+]
+
+sources = [
+    "csrc/allreduce/custom_all_reduce.hip",
+    "csrc/allreduce/quick_all_reduce.cu",
+    "csrc/common_extension_rocm.cc",
+    "csrc/elementwise/activation.cu",
+    "csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
+    "csrc/moe/moe_align_kernel.cu",
+    "csrc/moe/moe_topk_softmax_kernels.cu",
+    "csrc/speculative/eagle_utils.cu",
+    "csrc/kvcacheio/transfer.cu",
+]
+
+cxx_flags = ["-O3"]
+libraries = ["hiprtc", "amdhip64", "c10", "torch", "torch_python"]
+extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"]
+
+default_target = "gfx942"
+amdgpu_target = os.environ.get("AMDGPU_TARGET", default_target)
+
+if torch.cuda.is_available():
+    try:
+        amdgpu_target = torch.cuda.get_device_properties(0).gcnArchName.split(":")[0]
+    except Exception as e:
+        print(f"Warning: Failed to detect GPU properties: {e}")
+else:
+    print(f"Warning: torch.cuda not available. Using default target: {amdgpu_target}")
+
+if amdgpu_target not in ["gfx942", "gfx950"]:
+    print(
+        f"Warning: Unsupported GPU architecture detected '{amdgpu_target}'. Expected 'gfx942' or 'gfx950'."
+    )
+    sys.exit(1)
+
+fp8_macro = (
+    "-DHIP_FP8_TYPE_FNUZ" if amdgpu_target == "gfx942" else "-DHIP_FP8_TYPE_E4M3"
+)
+
+hipcc_flags = [
+    "-DNDEBUG",
+    f"-DOPERATOR_NAMESPACE={operator_namespace}",
+    "-O3",
+    "-Xcompiler",
+    "-fPIC",
+    "-std=c++17",
+    f"--amdgpu-target={amdgpu_target}",
+    "-DENABLE_BF16",
+    "-DENABLE_FP8",
+    fp8_macro,
+]
+
+ext_modules = [
+    CUDAExtension(
+        name="sgl_kernel.common_ops",
+        sources=sources,
+        include_dirs=include_dirs,
+        extra_compile_args={
+            "nvcc": hipcc_flags,
+            "cxx": cxx_flags,
+        },
+        libraries=libraries,
+        extra_link_args=extra_link_args,
+        py_limited_api=False,
+    ),
+]
+
+setup(
+    name="sgl-kernel",
+    version=_get_version(),
+    packages=find_packages(where="python"),
+    package_dir={"": "python"},
+    ext_modules=ext_modules,
+    cmdclass={"build_ext": BuildExtension.with_options(use_ninja=True)},
+    options={"bdist_wheel": {"py_limited_api": "cp39"}},
+)