feat: support pip install sglang (#10465)

2025-09-15 03:09:17 -07:00
parent 059c13de5c
commit 5afd036533
8 changed files with 269 additions and 133 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -10,131 +10,87 @@ readme = "README.md"
 requires-python = ">=3.10"
 license = { file = "LICENSE" }
 classifiers = [
-    "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python :: 3",
+  "License :: OSI Approved :: Apache Software License",
+]
+dependencies = [
+  "aiohttp",
+  "requests",
+  "tqdm",
+  "numpy",
+  "IPython",
+  "setproctitle",
+  "blobfile==3.0.0",
+  "build",
+  "compressed-tensors",
+  "datasets",
+  "einops",
+  "fastapi",
+  "hf_transfer",
+  "huggingface_hub",
+  "interegular",
+  "llguidance>=0.7.11,<0.8.0",
+  "modelscope",
+  "msgspec",
+  "ninja",
+  "openai==1.99.1",
+  "openai-harmony==0.0.4",
+  "orjson",
+  "outlines==0.1.11",
+  "packaging",
+  "partial_json_parser",
+  "pillow",
+  "prometheus-client>=0.20.0",
+  "psutil",
+  "pybase64",
+  "pydantic",
+  "pynvml",
+  "python-multipart",
+  "pyzmq>=25.1.2",
+  "scipy",
+  "sentencepiece",
+  "soundfile==0.13.1",
+  "timm==1.0.16",
+  "tiktoken",
+  "torchao==0.9.0",
+  "transformers==4.56.1",
+  "uvicorn",
+  "uvloop",
+  "xgrammar==0.1.24",
+  "sgl-kernel==0.3.9.post2",
+  "torch==2.8.0",
+  "torchaudio==2.8.0",
+  "torchvision",
+  "cuda-python",
+  "flashinfer_python==0.3.1",
+  "openai==1.99.1",
+  "tiktoken",
+  "anthropic>=0.20.0",
+  "torch_memory_saver==0.0.8",
+  "decord",
 ]
-dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"]

 [project.optional-dependencies]
-runtime_common = [
-    "blobfile==3.0.0",
-    "build",
-    "compressed-tensors",
-    "datasets",
-    "einops",
-    "fastapi",
-    "hf_transfer",
-    "huggingface_hub",
-    "interegular",
-    "llguidance>=0.7.11,<0.8.0",
-    "modelscope",
-    "msgspec",
-    "ninja",
-    "openai==1.99.1",
-    "openai-harmony==0.0.4",
-    "orjson",
-    "outlines==0.1.11",
-    "packaging",
-    "partial_json_parser",
-    "pillow",
-    "prometheus-client>=0.20.0",
-    "psutil",
-    "pybase64",
-    "pydantic",
-    "pynvml",
-    "python-multipart",
-    "pyzmq>=25.1.2",
-    "scipy",
-    "sentencepiece",
-    "soundfile==0.13.1",
-    "timm==1.0.16",
-    "tiktoken",
-    "torchao==0.9.0",
-    "transformers==4.56.1",
-    "uvicorn",
-    "uvloop",
-    "xgrammar==0.1.24",
+test = [
+  "accelerate",
+  "expecttest",
+  "jsonlines",
+  "matplotlib",
+  "pandas",
+  "peft",
+  "sentence_transformers",
+  "pytest",
+  "tabulate",
 ]
-
 tracing = [
    "opentelemetry-sdk",
    "opentelemetry-api",
    "opentelemetry-exporter-otlp",
    "opentelemetry-exporter-otlp-proto-grpc",
 ]
-
-srt = [
-    "sglang[runtime_common]",
-    "sgl-kernel==0.3.9.post2",
-    "torch==2.8.0",
-    "torchaudio==2.8.0",
-    "torchvision",
-    "cuda-python",
-    "flashinfer_python==0.3.1",
-]
-
-blackwell = [
-    "sglang[runtime_common]",
-    "sgl-kernel==0.3.9.post2",
-    "torch==2.8.0",
-    "torchaudio==2.8.0",
-    "torchvision",
-    "cuda-python",
-    "flashinfer_python==0.3.1",
-    "nvidia-cutlass-dsl==4.1.0",
-]
-
-# HIP (Heterogeneous-computing Interface for Portability) for AMD
-# => base docker rocm/vllm-dev:20250114, not from public vllm whl
-srt_hip = [
-    "sglang[runtime_common]",
-    "torch",
-    "petit_kernel==0.0.2",
-    "wave-lang==3.7.0",
-]
-
-# https://docs.sglang.ai/platforms/cpu_server.html
-srt_cpu = ["sglang[runtime_common]", "intel-openmp"]
-
-# https://docs.sglang.ai/platforms/ascend_npu.html
-srt_npu = ["sglang[runtime_common]"]
-
-# xpu is not enabled in public vllm and torch whl,
-# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
-srt_xpu = ["sglang[runtime_common]"]
-
-# For Intel Gaudi(device : hpu) follow the installation guide
-# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
-srt_hpu = ["sglang[runtime_common]"]
-
-openai = ["openai==1.99.1", "tiktoken"]
-anthropic = ["anthropic>=0.20.0"]
-litellm = ["litellm>=1.0.0"]
-torch_memory_saver = ["torch_memory_saver==0.0.8"]
-decord = ["decord"]
-test = [
-    "accelerate",
-    "expecttest",
-    "jsonlines",
-    "matplotlib",
-    "pandas",
-    "peft",
-    "sentence_transformers",
-    "pytest",
-    "tabulate",
-]
-all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
-all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-
-dev = ["sglang[all]", "sglang[test]"]
-dev_hip = ["sglang[all_hip]", "sglang[test]"]
-dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
-dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
-dev_cpu = ["sglang[all_cpu]", "sglang[test]"]
+all = ["sglang[test]"]
+blackwell = ["nvidia-cutlass-dsl==4.1.0", "sglang[test]"]
+dev = ["sglang[test]"]

 [project.urls]
 "Homepage" = "https://github.com/sgl-project/sglang"
@@ -142,31 +98,31 @@ dev_cpu = ["sglang[all_cpu]", "sglang[test]"]

 [tool.setuptools.package-data]
 "sglang" = [
-    "srt/layers/moe/fused_moe_triton/configs/*/*.json",
-    "srt/layers/quantization/configs/*.json",
-    "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
+  "srt/layers/moe/fused_moe_triton/configs/*/*.json",
+  "srt/layers/quantization/configs/*.json",
+  "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
 ]

 [tool.setuptools.packages.find]
 exclude = [
-    "assets*",
-    "benchmark*",
-    "docs*",
-    "dist*",
-    "playground*",
-    "scripts*",
-    "tests*",
+  "assets*",
+  "benchmark*",
+  "docs*",
+  "dist*",
+  "playground*",
+  "scripts*",
+  "tests*",
 ]

 [tool.wheel]
 exclude = [
-    "assets*",
-    "benchmark*",
-    "docs*",
-    "dist*",
-    "playground*",
-    "scripts*",
-    "tests*",
+  "assets*",
+  "benchmark*",
+  "docs*",
+  "dist*",
+  "playground*",
+  "scripts*",
+  "tests*",
 ]

 [tool.codespell]
--- a/python/pyproject_other.toml
+++ b/python/pyproject_other.toml
@@ -0,0 +1,174 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sglang"
+version = "0.5.2"
+description = "SGLang is a fast serving framework for large language models and vision language models."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+]
+dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"]
+
+[project.optional-dependencies]
+runtime_common = [
+    "blobfile==3.0.0",
+    "build",
+    "compressed-tensors",
+    "datasets",
+    "einops",
+    "fastapi",
+    "hf_transfer",
+    "huggingface_hub",
+    "interegular",
+    "llguidance>=0.7.11,<0.8.0",
+    "modelscope",
+    "msgspec",
+    "ninja",
+    "openai==1.99.1",
+    "openai-harmony==0.0.4",
+    "orjson",
+    "outlines==0.1.11",
+    "packaging",
+    "partial_json_parser",
+    "pillow",
+    "prometheus-client>=0.20.0",
+    "psutil",
+    "pybase64",
+    "pydantic",
+    "pynvml",
+    "python-multipart",
+    "pyzmq>=25.1.2",
+    "scipy",
+    "sentencepiece",
+    "soundfile==0.13.1",
+    "timm==1.0.16",
+    "tiktoken",
+    "torchao==0.9.0",
+    "transformers==4.56.1",
+    "uvicorn",
+    "uvloop",
+    "xgrammar==0.1.24",
+]
+
+tracing = [
+    "opentelemetry-sdk",
+    "opentelemetry-api",
+    "opentelemetry-exporter-otlp",
+    "opentelemetry-exporter-otlp-proto-grpc",
+]
+
+srt = [
+    "sglang[runtime_common]",
+    "sgl-kernel==0.3.9.post2",
+    "torch==2.8.0",
+    "torchaudio==2.8.0",
+    "torchvision",
+    "cuda-python",
+    "flashinfer_python==0.3.1",
+]
+
+blackwell = [
+    "sglang[runtime_common]",
+    "sgl-kernel==0.3.9.post2",
+    "torch==2.8.0",
+    "torchaudio==2.8.0",
+    "torchvision",
+    "cuda-python",
+    "flashinfer_python==0.3.1",
+    "nvidia-cutlass-dsl==4.1.0",
+]
+
+# HIP (Heterogeneous-computing Interface for Portability) for AMD
+# => base docker rocm/vllm-dev:20250114, not from public vllm whl
+srt_hip = [
+    "sglang[runtime_common]",
+    "torch",
+    "petit_kernel==0.0.2",
+    "wave-lang==3.7.0",
+]
+
+# https://docs.sglang.ai/platforms/cpu_server.html
+srt_cpu = ["sglang[runtime_common]", "intel-openmp"]
+
+# https://docs.sglang.ai/platforms/ascend_npu.html
+srt_npu = ["sglang[runtime_common]"]
+
+# xpu is not enabled in public vllm and torch whl,
+# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
+srt_xpu = ["sglang[runtime_common]"]
+
+# For Intel Gaudi(device : hpu) follow the installation guide
+# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
+srt_hpu = ["sglang[runtime_common]"]
+
+openai = ["openai==1.99.1", "tiktoken"]
+anthropic = ["anthropic>=0.20.0"]
+litellm = ["litellm>=1.0.0"]
+torch_memory_saver = ["torch_memory_saver==0.0.8"]
+decord = ["decord"]
+test = [
+    "accelerate",
+    "expecttest",
+    "jsonlines",
+    "matplotlib",
+    "pandas",
+    "peft",
+    "sentence_transformers",
+    "pytest",
+    "tabulate",
+]
+all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
+all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+
+dev = ["sglang[all]", "sglang[test]"]
+dev_hip = ["sglang[all_hip]", "sglang[test]"]
+dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
+dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
+dev_cpu = ["sglang[all_cpu]", "sglang[test]"]
+
+[project.urls]
+"Homepage" = "https://github.com/sgl-project/sglang"
+"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
+
+[tool.setuptools.package-data]
+"sglang" = [
+    "srt/layers/moe/fused_moe_triton/configs/*/*.json",
+    "srt/layers/quantization/configs/*.json",
+    "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
+]
+
+[tool.setuptools.packages.find]
+exclude = [
+    "assets*",
+    "benchmark*",
+    "docs*",
+    "dist*",
+    "playground*",
+    "scripts*",
+    "tests*",
+]
+
+[tool.wheel]
+exclude = [
+    "assets*",
+    "benchmark*",
+    "docs*",
+    "dist*",
+    "playground*",
+    "scripts*",
+    "tests*",
+]
+
+[tool.codespell]
+ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"
+skip = "*.json,*.jsonl,*.patch,*.txt"