From 5afd0365334c113bdfb295687e353c4001f8f2e6 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Mon, 15 Sep 2025 03:09:17 -0700 Subject: [PATCH] feat: support pip install sglang (#10465) --- .github/workflows/pr-test-xeon.yml | 1 + docker/Dockerfile.npu | 2 +- docker/Dockerfile.rocm | 1 + docker/Dockerfile.xeon | 1 + python/pyproject.toml | 220 ++++++++++-------------- python/pyproject_other.toml | 174 +++++++++++++++++++ scripts/ci/amd_ci_install_dependency.sh | 2 + scripts/ci/npu_ci_install_dependency.sh | 1 + 8 files changed, 269 insertions(+), 133 deletions(-) create mode 100755 python/pyproject_other.toml diff --git a/.github/workflows/pr-test-xeon.yml b/.github/workflows/pr-test-xeon.yml index 9ba9be94b..d445a9a89 100644 --- a/.github/workflows/pr-test-xeon.yml +++ b/.github/workflows/pr-test-xeon.yml @@ -58,6 +58,7 @@ jobs: docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip" docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ." + docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml" docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[dev_cpu]"" - name: Check AMX support diff --git a/docker/Dockerfile.npu b/docker/Dockerfile.npu index 01f9cf7e7..df9235607 100644 --- a/docker/Dockerfile.npu +++ b/docker/Dockerfile.npu @@ -77,7 +77,7 @@ RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --inde # Install SGLang RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \ - (cd sglang/python && pip install -v .[srt_npu] --no-cache-dir) && \ + (cd sglang/python && rm -rf pyproject.toml && mv pyproject_other.toml pyproject.toml && pip install -v .[srt_npu] --no-cache-dir) && \ (cd sglang/sgl-router && python -m build && pip install --force-reinstall dist/*.whl) && \ rm -rf sglang diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 4ed5c37d9..3825fb961 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -181,6 +181,7 @@ RUN git clone ${SGL_REPO} \ && mv pyproject_rocm.toml pyproject.toml \ && AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \ && cd .. \ + && rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml \ && if [ "$BUILD_TYPE" = "srt" ]; then \ python -m pip --no-cache-dir install -e "python[srt_hip]" ${NO_DEPS_FLAG}; \ else \ diff --git a/docker/Dockerfile.xeon b/docker/Dockerfile.xeon index fdc439b30..f5fe8511a 100644 --- a/docker/Dockerfile.xeon +++ b/docker/Dockerfile.xeon @@ -35,6 +35,7 @@ RUN pip config set global.index-url https://download.pytorch.org/whl/cpu && \ RUN git clone https://github.com/sgl-project/sglang.git && \ cd sglang && \ + rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml && \ git checkout ${VER_SGLANG} && \ pip install -e "python[all_cpu]" && \ pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \ diff --git a/python/pyproject.toml b/python/pyproject.toml index 403f68143..1b9d47483 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -10,131 +10,87 @@ readme = "README.md" requires-python = ">=3.10" license = { file = "LICENSE" } classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", +] +dependencies = [ + "aiohttp", + "requests", + "tqdm", + "numpy", + "IPython", + "setproctitle", + "blobfile==3.0.0", + "build", + "compressed-tensors", + "datasets", + "einops", + "fastapi", + "hf_transfer", + "huggingface_hub", + "interegular", + "llguidance>=0.7.11,<0.8.0", + "modelscope", + "msgspec", + "ninja", + "openai==1.99.1", + "openai-harmony==0.0.4", + "orjson", + "outlines==0.1.11", + "packaging", + "partial_json_parser", + "pillow", + "prometheus-client>=0.20.0", + "psutil", + "pybase64", + "pydantic", + "pynvml", + "python-multipart", + "pyzmq>=25.1.2", + "scipy", + "sentencepiece", + "soundfile==0.13.1", + "timm==1.0.16", + "tiktoken", + "torchao==0.9.0", + "transformers==4.56.1", + "uvicorn", + "uvloop", + "xgrammar==0.1.24", + "sgl-kernel==0.3.9.post2", + "torch==2.8.0", + "torchaudio==2.8.0", + "torchvision", + "cuda-python", + "flashinfer_python==0.3.1", + "openai==1.99.1", + "tiktoken", + "anthropic>=0.20.0", + "torch_memory_saver==0.0.8", + "decord", ] -dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"] [project.optional-dependencies] -runtime_common = [ - "blobfile==3.0.0", - "build", - "compressed-tensors", - "datasets", - "einops", - "fastapi", - "hf_transfer", - "huggingface_hub", - "interegular", - "llguidance>=0.7.11,<0.8.0", - "modelscope", - "msgspec", - "ninja", - "openai==1.99.1", - "openai-harmony==0.0.4", - "orjson", - "outlines==0.1.11", - "packaging", - "partial_json_parser", - "pillow", - "prometheus-client>=0.20.0", - "psutil", - "pybase64", - "pydantic", - "pynvml", - "python-multipart", - "pyzmq>=25.1.2", - "scipy", - "sentencepiece", - "soundfile==0.13.1", - "timm==1.0.16", - "tiktoken", - "torchao==0.9.0", - "transformers==4.56.1", - "uvicorn", - "uvloop", - "xgrammar==0.1.24", +test = [ + "accelerate", + "expecttest", + "jsonlines", + "matplotlib", + "pandas", + "peft", + "sentence_transformers", + "pytest", + "tabulate", ] - tracing = [ "opentelemetry-sdk", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-exporter-otlp-proto-grpc", ] - -srt = [ - "sglang[runtime_common]", - "sgl-kernel==0.3.9.post2", - "torch==2.8.0", - "torchaudio==2.8.0", - "torchvision", - "cuda-python", - "flashinfer_python==0.3.1", -] - -blackwell = [ - "sglang[runtime_common]", - "sgl-kernel==0.3.9.post2", - "torch==2.8.0", - "torchaudio==2.8.0", - "torchvision", - "cuda-python", - "flashinfer_python==0.3.1", - "nvidia-cutlass-dsl==4.1.0", -] - -# HIP (Heterogeneous-computing Interface for Portability) for AMD -# => base docker rocm/vllm-dev:20250114, not from public vllm whl -srt_hip = [ - "sglang[runtime_common]", - "torch", - "petit_kernel==0.0.2", - "wave-lang==3.7.0", -] - -# https://docs.sglang.ai/platforms/cpu_server.html -srt_cpu = ["sglang[runtime_common]", "intel-openmp"] - -# https://docs.sglang.ai/platforms/ascend_npu.html -srt_npu = ["sglang[runtime_common]"] - -# xpu is not enabled in public vllm and torch whl, -# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm -srt_xpu = ["sglang[runtime_common]"] - -# For Intel Gaudi(device : hpu) follow the installation guide -# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html -srt_hpu = ["sglang[runtime_common]"] - -openai = ["openai==1.99.1", "tiktoken"] -anthropic = ["anthropic>=0.20.0"] -litellm = ["litellm>=1.0.0"] -torch_memory_saver = ["torch_memory_saver==0.0.8"] -decord = ["decord"] -test = [ - "accelerate", - "expecttest", - "jsonlines", - "matplotlib", - "pandas", - "peft", - "sentence_transformers", - "pytest", - "tabulate", -] -all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"] -all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] -all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] -all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] -all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] -all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] - -dev = ["sglang[all]", "sglang[test]"] -dev_hip = ["sglang[all_hip]", "sglang[test]"] -dev_xpu = ["sglang[all_xpu]", "sglang[test]"] -dev_hpu = ["sglang[all_hpu]", "sglang[test]"] -dev_cpu = ["sglang[all_cpu]", "sglang[test]"] +all = ["sglang[test]"] +blackwell = ["nvidia-cutlass-dsl==4.1.0", "sglang[test]"] +dev = ["sglang[test]"] [project.urls] "Homepage" = "https://github.com/sgl-project/sglang" @@ -142,31 +98,31 @@ dev_cpu = ["sglang[all_cpu]", "sglang[test]"] [tool.setuptools.package-data] "sglang" = [ - "srt/layers/moe/fused_moe_triton/configs/*/*.json", - "srt/layers/quantization/configs/*.json", - "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp", + "srt/layers/moe/fused_moe_triton/configs/*/*.json", + "srt/layers/quantization/configs/*.json", + "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp", ] [tool.setuptools.packages.find] exclude = [ - "assets*", - "benchmark*", - "docs*", - "dist*", - "playground*", - "scripts*", - "tests*", + "assets*", + "benchmark*", + "docs*", + "dist*", + "playground*", + "scripts*", + "tests*", ] [tool.wheel] exclude = [ - "assets*", - "benchmark*", - "docs*", - "dist*", - "playground*", - "scripts*", - "tests*", + "assets*", + "benchmark*", + "docs*", + "dist*", + "playground*", + "scripts*", + "tests*", ] [tool.codespell] diff --git a/python/pyproject_other.toml b/python/pyproject_other.toml new file mode 100755 index 000000000..403f68143 --- /dev/null +++ b/python/pyproject_other.toml @@ -0,0 +1,174 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "sglang" +version = "0.5.2" +description = "SGLang is a fast serving framework for large language models and vision language models." +readme = "README.md" +requires-python = ">=3.10" +license = { file = "LICENSE" } +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", +] +dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"] + +[project.optional-dependencies] +runtime_common = [ + "blobfile==3.0.0", + "build", + "compressed-tensors", + "datasets", + "einops", + "fastapi", + "hf_transfer", + "huggingface_hub", + "interegular", + "llguidance>=0.7.11,<0.8.0", + "modelscope", + "msgspec", + "ninja", + "openai==1.99.1", + "openai-harmony==0.0.4", + "orjson", + "outlines==0.1.11", + "packaging", + "partial_json_parser", + "pillow", + "prometheus-client>=0.20.0", + "psutil", + "pybase64", + "pydantic", + "pynvml", + "python-multipart", + "pyzmq>=25.1.2", + "scipy", + "sentencepiece", + "soundfile==0.13.1", + "timm==1.0.16", + "tiktoken", + "torchao==0.9.0", + "transformers==4.56.1", + "uvicorn", + "uvloop", + "xgrammar==0.1.24", +] + +tracing = [ + "opentelemetry-sdk", + "opentelemetry-api", + "opentelemetry-exporter-otlp", + "opentelemetry-exporter-otlp-proto-grpc", +] + +srt = [ + "sglang[runtime_common]", + "sgl-kernel==0.3.9.post2", + "torch==2.8.0", + "torchaudio==2.8.0", + "torchvision", + "cuda-python", + "flashinfer_python==0.3.1", +] + +blackwell = [ + "sglang[runtime_common]", + "sgl-kernel==0.3.9.post2", + "torch==2.8.0", + "torchaudio==2.8.0", + "torchvision", + "cuda-python", + "flashinfer_python==0.3.1", + "nvidia-cutlass-dsl==4.1.0", +] + +# HIP (Heterogeneous-computing Interface for Portability) for AMD +# => base docker rocm/vllm-dev:20250114, not from public vllm whl +srt_hip = [ + "sglang[runtime_common]", + "torch", + "petit_kernel==0.0.2", + "wave-lang==3.7.0", +] + +# https://docs.sglang.ai/platforms/cpu_server.html +srt_cpu = ["sglang[runtime_common]", "intel-openmp"] + +# https://docs.sglang.ai/platforms/ascend_npu.html +srt_npu = ["sglang[runtime_common]"] + +# xpu is not enabled in public vllm and torch whl, +# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm +srt_xpu = ["sglang[runtime_common]"] + +# For Intel Gaudi(device : hpu) follow the installation guide +# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html +srt_hpu = ["sglang[runtime_common]"] + +openai = ["openai==1.99.1", "tiktoken"] +anthropic = ["anthropic>=0.20.0"] +litellm = ["litellm>=1.0.0"] +torch_memory_saver = ["torch_memory_saver==0.0.8"] +decord = ["decord"] +test = [ + "accelerate", + "expecttest", + "jsonlines", + "matplotlib", + "pandas", + "peft", + "sentence_transformers", + "pytest", + "tabulate", +] +all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"] +all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] +all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] +all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] +all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] +all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] + +dev = ["sglang[all]", "sglang[test]"] +dev_hip = ["sglang[all_hip]", "sglang[test]"] +dev_xpu = ["sglang[all_xpu]", "sglang[test]"] +dev_hpu = ["sglang[all_hpu]", "sglang[test]"] +dev_cpu = ["sglang[all_cpu]", "sglang[test]"] + +[project.urls] +"Homepage" = "https://github.com/sgl-project/sglang" +"Bug Tracker" = "https://github.com/sgl-project/sglang/issues" + +[tool.setuptools.package-data] +"sglang" = [ + "srt/layers/moe/fused_moe_triton/configs/*/*.json", + "srt/layers/quantization/configs/*.json", + "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp", +] + +[tool.setuptools.packages.find] +exclude = [ + "assets*", + "benchmark*", + "docs*", + "dist*", + "playground*", + "scripts*", + "tests*", +] + +[tool.wheel] +exclude = [ + "assets*", + "benchmark*", + "docs*", + "dist*", + "playground*", + "scripts*", + "tests*", +] + +[tool.codespell] +ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment" +skip = "*.json,*.jsonl,*.patch,*.txt" diff --git a/scripts/ci/amd_ci_install_dependency.sh b/scripts/ci/amd_ci_install_dependency.sh index 518f0dde9..98bccd7cd 100755 --- a/scripts/ci/amd_ci_install_dependency.sh +++ b/scripts/ci/amd_ci_install_dependency.sh @@ -19,6 +19,7 @@ docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.to case "${GPU_ARCH}" in mi35x) echo "Runner uses ${GPU_ARCH}; will fetch mi35x image." + docker exec ci_sglang rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml docker exec ci_sglang pip install -e "python[dev_hip]" --no-deps # TODO: only for mi35x # For lmms_evals evaluating MMMU docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git @@ -26,6 +27,7 @@ case "${GPU_ARCH}" in ;; mi30x|mi300|mi325) echo "Runner uses ${GPU_ARCH}; will fetch mi30x image." + docker exec ci_sglang rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml docker exec ci_sglang pip install -e "python[dev_hip]" # For lmms_evals evaluating MMMU docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git diff --git a/scripts/ci/npu_ci_install_dependency.sh b/scripts/ci/npu_ci_install_dependency.sh index 97c7dba1b..4246bb419 100755 --- a/scripts/ci/npu_ci_install_dependency.sh +++ b/scripts/ci/npu_ci_install_dependency.sh @@ -64,4 +64,5 @@ git clone --depth 1 https://github.com/sgl-project/sgl-kernel-npu.git --branch $ ### Install SGLang +rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml ${PIP_INSTALL} -v -e "python[srt_npu]"