sglang/python/pyproject.toml

[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "sglang"
version = "0.4.3"
description = "SGLang is yet another fast serving framework for large language models and vision language models."
readme = "README.md"
requires-python = ">=3.8"
license = { file = "LICENSE" }
classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: Apache Software License",
]
dependencies = ["requests", "tqdm", "numpy", "IPython", "setproctitle"]

[project.optional-dependencies]
runtime_common = [
    "aiohttp", "decord", "fastapi",
    "hf_transfer", "huggingface_hub", "interegular", "modelscope",
    "orjson", "packaging", "pillow", "prometheus-client>=0.20.0",
    "psutil", "pydantic", "python-multipart", "pyzmq>=25.1.2",
    "torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar==0.1.10", "ninja"
]
srt = [
    "sglang[runtime_common]", "cuda-python",
    "sgl-kernel>=0.0.3.post6", "torch", "vllm>=0.6.4.post1,<=0.7.2",
    "flashinfer_python>=0.2.1.post1",
    "outlines>=0.0.44,<=0.1.11",
]

# HIP (Heterogeneous-computing Interface for Portability) for AMD
# => base docker rocm/vllm-dev:20241022, not from public vllm whl
srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.7.dev2", "outlines==0.1.11", "sgl-kernel>=0.0.3.post1"]
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu = ["sglang[runtime_common]", "outlines>=0.0.44,<0.1.0"]
#For Intel Gaudi(device : hpu) follow the installation guide
#https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<0.1.0"]
# CPU: currently, there are no pre-built vllm wheels for CPU.
# To install vllm for CPU, please follow the instruction here:
# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
srt_cpu = ["sglang[runtime_common]", "torch", "outlines>=0.0.44,<0.1.0"]

openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
torch_memory_saver = ["torch_memory_saver"]
test = [
    "jsonlines",
    "matplotlib",
    "pandas",
    "sentence_transformers",
    "accelerate",
    "peft",
]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]

dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"]
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
dev_cpu = ["sglang[all_cpu]", "sglang[test]"]

[project.urls]
"Homepage" = "https://github.com/sgl-project/sglang"
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"

[tool.setuptools.package-data]
"sglang" = ["srt/layers/moe/fused_moe_triton/configs/*.json", "srt/layers/quantization/configs/*.json"]

[tool.setuptools.packages.find]
exclude = [
    "assets*",
    "benchmark*",
    "docs*",
    "dist*",
    "playground*",
    "scripts*",
    "tests*",
]

[tool.wheel]
exclude = [
    "assets*",
    "benchmark*",
    "docs*",
    "dist*",
    "playground*",
    "scripts*",
    "tests*",
]
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`[build-system]`
			`requires = ["setuptools>=61.0", "wheel"]`
			`build-backend = "setuptools.build_meta"`

			`[project]`
			`name = "sglang"`
chore: bump v0.4.3 (#3556) 2025-02-14 09:43:14 +08:00			`version = "0.4.3"`
misc: update SGLang package description (#659) 2024-07-20 02:27:39 +10:00			`description = "SGLang is yet another fast serving framework for large language models and vision language models."`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`readme = "README.md"`
			`requires-python = ">=3.8"`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`license = { file = "LICENSE" }`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`classifiers = [`
			`"Programming Language :: Python :: 3",`
			`"License :: OSI Approved :: Apache Software License",`
			`]`
nit: Remove busy waiting on scheduler (#2382) 2024-12-08 01:06:15 -08:00			`dependencies = ["requests", "tqdm", "numpy", "IPython", "setproctitle"]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
			`[project.optional-dependencies]`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`runtime_common = [`
			`"aiohttp", "decord", "fastapi",`
[CI] Minor fix for CI (#2187) 2024-11-25 16:38:43 -08:00			`"hf_transfer", "huggingface_hub", "interegular", "modelscope",`
ROCm: bump 6.3.0 (#3259) 2025-02-02 12:13:40 -08:00			`"orjson", "packaging", "pillow", "prometheus-client>=0.20.0",`
			`"psutil", "pydantic", "python-multipart", "pyzmq>=25.1.2",`
fix apply_token_bitmask_inplace_cuda (#3594) 2025-02-15 23:55:08 +08:00			`"torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar==0.1.10", "ninja"`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`]`
			`srt = [`
			`"sglang[runtime_common]", "cuda-python",`
chore: bump v0.4.3 (#3556) 2025-02-14 09:43:14 +08:00			`"sgl-kernel>=0.0.3.post6", "torch", "vllm>=0.6.4.post1,<=0.7.2",`
feat: support flashinfer mla attention for deepseek v3 (#3550) 2025-02-14 08:50:14 +08:00			`"flashinfer_python>=0.2.1.post1",`
			`"outlines>=0.0.44,<=0.1.11",`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`]`
[minor] Improve code style and compatibility (#1961) 2024-11-08 02:19:41 -08:00
[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861) 2024-10-31 16:38:16 -07:00			`# HIP (Heterogeneous-computing Interface for Portability) for AMD`
			`# => base docker rocm/vllm-dev:20241022, not from public vllm whl`
ROCm: sgl-kernel enablement starting with sgl_moe_align_block (#3287) 2025-02-04 05:44:44 -08:00			`srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.7.dev2", "outlines==0.1.11", "sgl-kernel>=0.0.3.post1"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00			`# xpu is not enabled in public vllm and torch whl,`
			`# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm`
ROCm: bump 6.3.0 (#3259) 2025-02-02 12:13:40 -08:00			`srt_xpu = ["sglang[runtime_common]", "outlines>=0.0.44,<0.1.0"]`
Add initial support for intel Gaudi accelerators (#2121) 2024-11-23 09:52:23 +05:30			`#For Intel Gaudi(device : hpu) follow the installation guide`
			`#https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html`
ROCm: bump 6.3.0 (#3259) 2025-02-02 12:13:40 -08:00			`srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<0.1.0"]`
Enable CPU device on SGLang (#2806) 2025-01-17 13:22:53 +08:00			`# CPU: currently, there are no pre-built vllm wheels for CPU.`
			`# To install vllm for CPU, please follow the instruction here:`
			`# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html`
ROCm: bump 6.3.0 (#3259) 2025-02-02 12:13:40 -08:00			`srt_cpu = ["sglang[runtime_common]", "torch", "outlines>=0.0.44,<0.1.0"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00
Fix missing numpy dependency in pyproject.toml (#524) 2024-06-10 21:13:50 +02:00			`openai = ["openai>=1.0", "tiktoken"]`
			`anthropic = ["anthropic>=0.20.0"]`
Litellm Backend (#502) 2024-06-08 03:24:28 +08:00			`litellm = ["litellm>=1.0.0"]`
CUDA-graph-compatible releasing and resuming KV cache and model weight memory (#2630) 2025-01-14 03:38:51 +08:00			`torch_memory_saver = ["torch_memory_saver"]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`test = [`
			`"jsonlines",`
			`"matplotlib",`
			`"pandas",`
			`"sentence_transformers",`
			`"accelerate",`
			`"peft",`
			`]`
Litellm Backend (#502) 2024-06-08 03:24:28 +08:00			`all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]`
[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861) 2024-10-31 16:38:16 -07:00			`all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00			`all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]`
Add initial support for intel Gaudi accelerators (#2121) 2024-11-23 09:52:23 +05:30			`all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]`
Enable CPU device on SGLang (#2806) 2025-01-17 13:22:53 +08:00			`all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]`
[Minor] Improve code style (#2422) 2024-12-09 06:30:35 -08:00
latency test enhancement - final part (#921) 2024-08-04 18:15:23 -07:00			`dev = ["sglang[all]", "sglang[test]"]`
[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861) 2024-10-31 16:38:16 -07:00			`dev_hip = ["sglang[all_hip]", "sglang[test]"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00			`dev_xpu = ["sglang[all_xpu]", "sglang[test]"]`
Add initial support for intel Gaudi accelerators (#2121) 2024-11-23 09:52:23 +05:30			`dev_hpu = ["sglang[all_hpu]", "sglang[test]"]`
Enable CPU device on SGLang (#2806) 2025-01-17 13:22:53 +08:00			`dev_cpu = ["sglang[all_cpu]", "sglang[test]"]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
Fix test cases (#6) 2024-01-15 01:15:53 -08:00			`[project.urls]`
			`"Homepage" = "https://github.com/sgl-project/sglang"`
			`"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"`

fix: package data missing (#2521) 2024-12-27 00:16:48 +08:00			`[tool.setuptools.package-data]`
Release 0.4.1.post3 - upload the config.json to PyPI (#2647) 2024-12-29 14:25:53 -08:00			`"sglang" = ["srt/layers/moe/fused_moe_triton/configs/.json", "srt/layers/quantization/configs/.json"]`
fix: package data missing (#2521) 2024-12-27 00:16:48 +08:00
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`[tool.setuptools.packages.find]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`exclude = [`
			`"assets*",`
			`"benchmark*",`
			`"docs*",`
			`"dist*",`
			`"playground*",`
			`"scripts*",`
			`"tests*",`
			`]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
			`[tool.wheel]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`exclude = [`
			`"assets*",`
			`"benchmark*",`
			`"docs*",`
			`"dist*",`
			`"playground*",`
			`"scripts*",`
			`"tests*",`
			`]`