[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861)
This commit is contained in:
45
docker/Dockerfile.rocm
Normal file
45
docker/Dockerfile.rocm
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Usage (to build SGLang ROCm docker image):
|
||||||
|
# docker build --build-arg SGL_BRANCH=v0.3.4.post2 -t testImage -f Dockerfile.rocm .
|
||||||
|
|
||||||
|
# default base image
|
||||||
|
ARG BASE_IMAGE="rocm/vllm-dev:20241022"
|
||||||
|
|
||||||
|
FROM $BASE_IMAGE AS base
|
||||||
|
USER root
|
||||||
|
|
||||||
|
WORKDIR /sgl-workspace
|
||||||
|
|
||||||
|
ARG SGL_REPO="https://github.com/sgl-project/sglang"
|
||||||
|
ENV SGL_DEFAULT="main"
|
||||||
|
ARG SGL_BRANCH=${SGL_DEFAULT}
|
||||||
|
|
||||||
|
RUN git clone ${SGL_REPO} \
|
||||||
|
&& cd sglang \
|
||||||
|
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
||||||
|
echo "Using ${SGL_DEFAULT}, default branch."; \
|
||||||
|
else \
|
||||||
|
echo "Using ${SGL_BRANCH} branch."; \
|
||||||
|
git checkout ${SGL_BRANCH}; \
|
||||||
|
fi \
|
||||||
|
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
||||||
|
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
||||||
|
else \
|
||||||
|
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN cp -r /sgl-workspace/sglang /sglang
|
||||||
|
RUN python -m pip cache purge
|
||||||
|
|
||||||
|
# Performance environment variable.
|
||||||
|
|
||||||
|
ENV HIP_FORCE_DEV_KERNARG=1
|
||||||
|
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
||||||
|
ENV NCCL_MIN_NCHANNELS=112
|
||||||
|
|
||||||
|
ENV MOE_PADDING=1
|
||||||
|
ENV VLLM_FP8_PADDING=1
|
||||||
|
ENV VLLM_FP8_ACT_PADDING=1
|
||||||
|
ENV VLLM_FP8_WEIGHT_PADDING=1
|
||||||
|
ENV VLLM_FP8_REDUCE_CONV=1
|
||||||
|
|
||||||
|
CMD ["/bin/bash"]
|
||||||
@@ -20,9 +20,12 @@ runtime_common = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hu
|
|||||||
"orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart",
|
"orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart",
|
||||||
"torchao", "uvicorn", "uvloop", "zmq",
|
"torchao", "uvicorn", "uvloop", "zmq",
|
||||||
"outlines>=0.0.44", "modelscope"]
|
"outlines>=0.0.44", "modelscope"]
|
||||||
|
srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
|
||||||
|
# HIP (Heterogeneous-computing Interface for Portability) for AMD
|
||||||
|
# => base docker rocm/vllm-dev:20241022, not from public vllm whl
|
||||||
|
srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
|
||||||
# xpu is not enabled in public vllm and torch whl,
|
# xpu is not enabled in public vllm and torch whl,
|
||||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
||||||
srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"]
|
|
||||||
srt_xpu = ["sglang[runtime_common]"]
|
srt_xpu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
openai = ["openai>=1.0", "tiktoken"]
|
openai = ["openai>=1.0", "tiktoken"]
|
||||||
@@ -37,8 +40,10 @@ test = [
|
|||||||
"peft",
|
"peft",
|
||||||
]
|
]
|
||||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
|
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
dev = ["sglang[all]", "sglang[test]"]
|
dev = ["sglang[all]", "sglang[test]"]
|
||||||
|
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
||||||
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
|
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|||||||
Reference in New Issue
Block a user