diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm new file mode 100644 index 000000000..003434454 --- /dev/null +++ b/docker/Dockerfile.rocm @@ -0,0 +1,45 @@ +# Usage (to build SGLang ROCm docker image): +# docker build --build-arg SGL_BRANCH=v0.3.4.post2 -t testImage -f Dockerfile.rocm . + +# default base image +ARG BASE_IMAGE="rocm/vllm-dev:20241022" + +FROM $BASE_IMAGE AS base +USER root + +WORKDIR /sgl-workspace + +ARG SGL_REPO="https://github.com/sgl-project/sglang" +ENV SGL_DEFAULT="main" +ARG SGL_BRANCH=${SGL_DEFAULT} + +RUN git clone ${SGL_REPO} \ + && cd sglang \ + && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \ + echo "Using ${SGL_DEFAULT}, default branch."; \ + else \ + echo "Using ${SGL_BRANCH} branch."; \ + git checkout ${SGL_BRANCH}; \ + fi \ + && if [ "$BUILD_TYPE" = "srt" ]; then \ + python -m pip --no-cache-dir install -e "python[srt_hip]"; \ + else \ + python -m pip --no-cache-dir install -e "python[all_hip]"; \ + fi + +RUN cp -r /sgl-workspace/sglang /sglang +RUN python -m pip cache purge + +# Performance environment variable. + +ENV HIP_FORCE_DEV_KERNARG=1 +ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 +ENV NCCL_MIN_NCHANNELS=112 + +ENV MOE_PADDING=1 +ENV VLLM_FP8_PADDING=1 +ENV VLLM_FP8_ACT_PADDING=1 +ENV VLLM_FP8_WEIGHT_PADDING=1 +ENV VLLM_FP8_REDUCE_CONV=1 + +CMD ["/bin/bash"] diff --git a/python/pyproject.toml b/python/pyproject.toml index fc4a62b7a..d9749e1ab 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -20,9 +20,12 @@ runtime_common = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hu "orjson", "packaging", "pillow", "psutil", "pydantic", "python-multipart", "torchao", "uvicorn", "uvloop", "zmq", "outlines>=0.0.44", "modelscope"] +srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"] +# HIP (Heterogeneous-computing Interface for Portability) for AMD +# => base docker rocm/vllm-dev:20241022, not from public vllm whl +srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"] # xpu is not enabled in public vllm and torch whl, # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm -srt = ["sglang[runtime_common]", "torch", "vllm==0.6.3.post1"] srt_xpu = ["sglang[runtime_common]"] openai = ["openai>=1.0", "tiktoken"] @@ -37,8 +40,10 @@ test = [ "peft", ] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] +all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] dev = ["sglang[all]", "sglang[test]"] +dev_hip = ["sglang[all_hip]", "sglang[test]"] dev_xpu = ["sglang[all_xpu]", "sglang[test]"] [project.urls]