diff --git a/README.md b/README.md index 7b79c62c2..90280f99f 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ ``` +### Method 3: Using Docker +The docker images are vailable on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags). + ### Notes - If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html). - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server. diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..3f2e87008 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,6 @@ +FROM vllm/vllm-openai + +RUN pip install --upgrade pip +RUN pip install "sglang[all]" +RUN pip uninstall -y triton triton-nightly && pip install --no-deps --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly +RUN pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/