[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861)

2024-10-31 16:38:16 -07:00
parent a2e0424abf
commit d8e9d61f86
2 changed files with 51 additions and 1 deletions
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -0,0 +1,45 @@
+# Usage (to build SGLang ROCm docker image):
+#   docker build --build-arg SGL_BRANCH=v0.3.4.post2 -t testImage -f Dockerfile.rocm .
+
+# default base image
+ARG BASE_IMAGE="rocm/vllm-dev:20241022"
+
+FROM $BASE_IMAGE AS base
+USER root
+
+WORKDIR /sgl-workspace
+
+ARG SGL_REPO="https://github.com/sgl-project/sglang"
+ENV SGL_DEFAULT="main"
+ARG SGL_BRANCH=${SGL_DEFAULT}
+
+RUN git clone ${SGL_REPO} \
+    && cd sglang \
+    && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
+         echo "Using ${SGL_DEFAULT}, default branch."; \
+       else \
+         echo "Using ${SGL_BRANCH} branch."; \
+         git checkout ${SGL_BRANCH}; \
+       fi \
+    && if [ "$BUILD_TYPE" = "srt" ]; then \
+         python -m pip --no-cache-dir install -e "python[srt_hip]"; \
+       else \
+         python -m pip --no-cache-dir install -e "python[all_hip]"; \
+       fi
+
+RUN cp -r /sgl-workspace/sglang /sglang
+RUN python -m pip cache purge
+
+# Performance environment variable.
+
+ENV HIP_FORCE_DEV_KERNARG=1
+ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
+ENV NCCL_MIN_NCHANNELS=112
+
+ENV MOE_PADDING=1
+ENV VLLM_FP8_PADDING=1
+ENV VLLM_FP8_ACT_PADDING=1
+ENV VLLM_FP8_WEIGHT_PADDING=1
+ENV VLLM_FP8_REDUCE_CONV=1
+
+CMD ["/bin/bash"]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -20,9 +20,12 @@ runtime_common = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hu
    "orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart",
    "torchao", "uvicorn", "uvloop", "zmq",
    "outlines>=0.0.44", "modelscope"]
+srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
+# HIP (Heterogeneous-computing Interface for Portability) for AMD
+# => base docker rocm/vllm-dev:20241022, not from public vllm whl
+srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
 # xpu is not enabled in public vllm and torch whl,
 # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
-srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
 srt_xpu = ["sglang[runtime_common]"]

 openai = ["openai>=1.0", "tiktoken"]
@@ -37,8 +40,10 @@ test = [
    "peft",
 ]
 all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
+all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
 all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
 dev = ["sglang[all]", "sglang[test]"]
+dev_hip = ["sglang[all_hip]", "sglang[test]"]
 dev_xpu = ["sglang[all_xpu]", "sglang[test]"]

 [project.urls]