From 592ddf374fe45ae97a85f5b3ff62be36f2b1afc9 Mon Sep 17 00:00:00 2001
From: hlu1 <14827759+hlu1@users.noreply.github.com>
Date: Fri, 26 Sep 2025 17:26:57 -0700
Subject: [PATCH] Add simple docker file for B300 (#10944)

Signed-off-by: Hao Lu <14827759+hlu1@users.noreply.github.com>
---
 docker/Dockerfile.b300 | 55 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 docker/Dockerfile.b300

diff --git a/docker/Dockerfile.b300 b/docker/Dockerfile.b300
new file mode 100644
index 000000000..0c18ca47d
--- /dev/null
+++ b/docker/Dockerfile.b300
@@ -0,0 +1,55 @@
+FROM nvcr.io/nvidia/pytorch:25.08-py3 AS base
+
+ARG BRANCH_TYPE=remote
+
+# Python tools
+RUN python3 -m pip install --no-cache-dir \
+    datamodel_code_generator \
+    mooncake-transfer-engine==0.3.6.post1 \
+    pre-commit \
+    pytest \
+    black \
+    isort \
+    icdiff \
+    uv \
+    wheel \
+    scikit-build-core \
+    nixl \
+    py-spy
+
+FROM scratch AS local_src
+COPY . /src
+
+FROM base AS build-image
+WORKDIR /sgl-workspace
+ARG BRANCH_TYPE
+COPY --from=local_src /src /tmp/local_src
+RUN if [ "$BRANCH_TYPE" = "local" ]; then \
+        cp -r /tmp/local_src /sgl-workspace/sglang; \
+    else \
+        git clone --depth=1 https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
+    fi \
+ && rm -rf /tmp/local_src
+
+# Modify source code to use existing torch
+# Remove after the next torch release
+RUN sed -i "/torch/d" sglang/sgl-kernel/pyproject.toml && \
+    sed -i -e "/torchaudio/d" \
+        -e "s/torch==2.8.0/torch==2.8.0a0+34c6371d24.nv25.8/" \
+        -e "s/torchao==0.9.0/torchao==0.12.0+git/" \
+        sglang/python/pyproject.toml
+
+# Necessary for cuda 13
+ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include/cccl
+
+# Make fa_4 run on B300
+ENV CUTE_DSL_ARCH=sm_100f
+
+RUN cd sglang/sgl-kernel/ && \
+    make build && \
+    cd .. && \
+    python3 -m pip install -e "python[all]"
+
+# Modify Triton source file to support cuda 13
+ENV TRITON_DIR=/usr/local/lib/python3.12/dist-packages/triton
+RUN grep -q 'if major >= 13:' ${TRITON_DIR}/backends/nvidia/compiler.py || bash -lc $'sed -i \'/^def ptx_get_version(cuda_version) -> int:/,/^[[:space:]]*raise RuntimeError/s/^\\([[:space:]]*\\)raise RuntimeError.*/\\1if major >= 13:\\n\\1    base_ptx = 90\\n\\1    return base_ptx + (major - 13) * 10 + minor\\n\\n\\1raise RuntimeError("Triton only support CUDA 10.0 or higher, but got CUDA version: " + cuda_version)/\' ${TRITON_DIR}/backends/nvidia/compiler.py'