2024-01-08 04:37:50 +00:00
|
|
|
[build-system]
|
2025-03-24 09:50:28 -07:00
|
|
|
requires = ["setuptools>=61.0", "wheel"]
|
2024-01-08 04:37:50 +00:00
|
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
|
|
|
|
|
|
[project]
|
|
|
|
|
name = "sglang"
|
2025-08-14 16:11:16 -07:00
|
|
|
version = "0.5.0rc2"
|
2024-07-20 02:27:39 +10:00
|
|
|
description = "SGLang is yet another fast serving framework for large language models and vision language models."
|
2024-01-08 04:37:50 +00:00
|
|
|
readme = "README.md"
|
2025-08-09 00:30:23 -07:00
|
|
|
requires-python = ">=3.10"
|
2024-10-21 15:01:21 -07:00
|
|
|
license = { file = "LICENSE" }
|
2024-01-08 04:37:50 +00:00
|
|
|
classifiers = [
|
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
|
]
|
2025-03-13 12:42:57 +08:00
|
|
|
dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"]
|
2024-01-08 04:37:50 +00:00
|
|
|
|
|
|
|
|
[project.optional-dependencies]
|
2025-01-07 02:52:53 -08:00
|
|
|
runtime_common = [
|
2025-05-24 21:43:38 -07:00
|
|
|
"blobfile==3.0.0",
|
2025-06-27 00:18:56 -07:00
|
|
|
"build",
|
2025-03-28 10:34:10 -07:00
|
|
|
"compressed-tensors",
|
2025-03-07 22:12:13 -08:00
|
|
|
"datasets",
|
2025-08-08 19:56:50 -07:00
|
|
|
"einops",
|
2025-02-24 03:50:58 -08:00
|
|
|
"fastapi",
|
|
|
|
|
"hf_transfer",
|
|
|
|
|
"huggingface_hub",
|
|
|
|
|
"interegular",
|
2025-04-26 20:13:57 -07:00
|
|
|
"llguidance>=0.7.11,<0.8.0",
|
2025-02-24 03:50:58 -08:00
|
|
|
"modelscope",
|
2025-05-19 14:19:54 -07:00
|
|
|
"msgspec",
|
2025-03-07 22:12:13 -08:00
|
|
|
"ninja",
|
2025-08-08 19:56:50 -07:00
|
|
|
"openai==1.99.1",
|
2025-08-19 00:02:00 +03:00
|
|
|
"openai-harmony==0.0.4",
|
2025-02-24 03:50:58 -08:00
|
|
|
"orjson",
|
2025-06-23 06:20:39 -07:00
|
|
|
"outlines==0.1.11",
|
2025-02-24 03:50:58 -08:00
|
|
|
"packaging",
|
2025-05-08 17:14:36 +08:00
|
|
|
"partial_json_parser",
|
2025-02-24 03:50:58 -08:00
|
|
|
"pillow",
|
|
|
|
|
"prometheus-client>=0.20.0",
|
|
|
|
|
"psutil",
|
2025-08-18 09:38:35 -07:00
|
|
|
"pybase64",
|
2025-02-24 03:50:58 -08:00
|
|
|
"pydantic",
|
2025-03-30 00:46:23 -07:00
|
|
|
"pynvml",
|
2025-02-24 03:50:58 -08:00
|
|
|
"python-multipart",
|
|
|
|
|
"pyzmq>=25.1.2",
|
2025-07-10 16:06:27 +08:00
|
|
|
"sentencepiece",
|
2025-03-25 11:08:40 +08:00
|
|
|
"soundfile==0.13.1",
|
2025-05-24 21:43:38 -07:00
|
|
|
"scipy",
|
2025-06-27 00:30:17 -07:00
|
|
|
"timm==1.0.16",
|
2025-08-08 19:56:50 -07:00
|
|
|
"tiktoken",
|
2025-08-10 10:59:30 -07:00
|
|
|
"torchao==0.9.0",
|
2025-08-14 13:51:02 -07:00
|
|
|
"transformers==4.55.2",
|
2025-02-24 03:50:58 -08:00
|
|
|
"uvicorn",
|
|
|
|
|
"uvloop",
|
2025-08-19 00:02:00 +03:00
|
|
|
"xgrammar==0.1.23",
|
2025-01-07 02:52:53 -08:00
|
|
|
]
|
2025-03-04 21:23:47 -08:00
|
|
|
|
2025-01-07 02:52:53 -08:00
|
|
|
srt = [
|
2025-02-24 03:50:58 -08:00
|
|
|
"sglang[runtime_common]",
|
2025-08-15 03:05:46 +08:00
|
|
|
"sgl-kernel==0.3.5",
|
2025-08-05 17:32:01 -07:00
|
|
|
"torch==2.8.0",
|
|
|
|
|
"torchaudio==2.8.0",
|
|
|
|
|
"torchvision",
|
2025-02-24 03:50:58 -08:00
|
|
|
"cuda-python",
|
2025-08-14 05:39:09 -04:00
|
|
|
"flashinfer_python==0.2.11.post3",
|
2025-01-07 02:52:53 -08:00
|
|
|
]
|
2024-11-08 02:19:41 -08:00
|
|
|
|
2025-04-11 13:08:53 -07:00
|
|
|
blackwell = [
|
|
|
|
|
"sglang[runtime_common]",
|
|
|
|
|
"sgl-kernel",
|
2025-08-05 17:32:01 -07:00
|
|
|
"torch==2.8.0",
|
|
|
|
|
"torchaudio==2.8.0",
|
|
|
|
|
"torchvision",
|
2025-04-11 13:08:53 -07:00
|
|
|
"cuda-python",
|
2025-08-14 05:39:09 -04:00
|
|
|
"flashinfer_python==0.2.11.post3",
|
2025-04-11 13:08:53 -07:00
|
|
|
]
|
|
|
|
|
|
2024-10-31 16:38:16 -07:00
|
|
|
# HIP (Heterogeneous-computing Interface for Portability) for AMD
|
2025-05-12 12:53:26 -07:00
|
|
|
# => base docker rocm/vllm-dev:20250114, not from public vllm whl
|
2025-03-28 10:34:10 -07:00
|
|
|
srt_hip = [
|
|
|
|
|
"sglang[runtime_common]",
|
|
|
|
|
"torch",
|
2025-07-23 17:57:20 -07:00
|
|
|
"petit_kernel==0.0.2",
|
2025-08-13 04:49:11 +08:00
|
|
|
"wave-lang==1.0.1",
|
2025-03-28 10:34:10 -07:00
|
|
|
]
|
2025-02-24 03:50:58 -08:00
|
|
|
|
2025-08-10 10:59:30 -07:00
|
|
|
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
|
|
|
|
srt_cpu = ["sglang[runtime_common]", "einops"]
|
|
|
|
|
|
2025-05-12 12:53:26 -07:00
|
|
|
# xpu is not enabled in public vllm and torch whl,
|
2024-10-13 02:10:32 +08:00
|
|
|
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
2025-06-23 06:20:39 -07:00
|
|
|
srt_xpu = ["sglang[runtime_common]"]
|
2025-02-24 03:50:58 -08:00
|
|
|
|
|
|
|
|
# For Intel Gaudi(device : hpu) follow the installation guide
|
|
|
|
|
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
2025-06-23 06:20:39 -07:00
|
|
|
srt_hpu = ["sglang[runtime_common]"]
|
2025-02-24 03:50:58 -08:00
|
|
|
|
2025-05-07 11:32:53 +08:00
|
|
|
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
|
2025-06-23 06:20:39 -07:00
|
|
|
srt_npu = ["sglang[runtime_common]"]
|
2024-10-13 02:10:32 +08:00
|
|
|
|
2025-08-07 14:20:35 -07:00
|
|
|
openai = ["openai==1.99.1", "tiktoken"]
|
2024-06-10 21:13:50 +02:00
|
|
|
anthropic = ["anthropic>=0.20.0"]
|
2024-06-08 03:24:28 +08:00
|
|
|
litellm = ["litellm>=1.0.0"]
|
2025-08-08 19:56:50 -07:00
|
|
|
torch_memory_saver = ["torch_memory_saver==0.0.8"]
|
2025-05-16 12:38:22 -07:00
|
|
|
decord = ["decord"]
|
2024-10-21 15:01:21 -07:00
|
|
|
test = [
|
2025-05-15 15:29:25 -07:00
|
|
|
"accelerate",
|
2025-08-08 15:53:51 -07:00
|
|
|
"expecttest",
|
2024-10-21 15:01:21 -07:00
|
|
|
"jsonlines",
|
|
|
|
|
"matplotlib",
|
|
|
|
|
"pandas",
|
|
|
|
|
"peft",
|
2025-05-15 15:29:25 -07:00
|
|
|
"sentence_transformers",
|
2025-08-03 13:48:42 +08:00
|
|
|
"pytest",
|
2024-10-21 15:01:21 -07:00
|
|
|
]
|
2025-08-10 10:59:30 -07:00
|
|
|
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
|
|
|
|
|
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
|
|
|
|
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
|
|
|
|
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
|
|
|
|
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
|
|
|
|
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
2024-12-09 06:30:35 -08:00
|
|
|
|
2024-08-04 18:15:23 -07:00
|
|
|
dev = ["sglang[all]", "sglang[test]"]
|
2024-10-31 16:38:16 -07:00
|
|
|
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
2024-10-13 02:10:32 +08:00
|
|
|
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
|
2024-11-23 09:52:23 +05:30
|
|
|
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
|
2025-01-17 13:22:53 +08:00
|
|
|
dev_cpu = ["sglang[all_cpu]", "sglang[test]"]
|
2024-01-08 04:37:50 +00:00
|
|
|
|
2024-01-15 01:15:53 -08:00
|
|
|
[project.urls]
|
|
|
|
|
"Homepage" = "https://github.com/sgl-project/sglang"
|
|
|
|
|
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
|
|
|
|
|
|
2024-12-27 00:16:48 +08:00
|
|
|
[tool.setuptools.package-data]
|
2025-03-03 00:12:04 -08:00
|
|
|
"sglang" = [
|
2025-06-10 14:23:03 +08:00
|
|
|
"srt/layers/moe/fused_moe_triton/configs/*/*.json",
|
2025-03-03 00:12:04 -08:00
|
|
|
"srt/layers/quantization/configs/*.json",
|
2025-08-01 12:41:09 +08:00
|
|
|
"srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
|
2025-03-03 00:12:04 -08:00
|
|
|
]
|
2024-12-27 00:16:48 +08:00
|
|
|
|
2024-01-08 04:37:50 +00:00
|
|
|
[tool.setuptools.packages.find]
|
2024-10-21 15:01:21 -07:00
|
|
|
exclude = [
|
|
|
|
|
"assets*",
|
|
|
|
|
"benchmark*",
|
|
|
|
|
"docs*",
|
|
|
|
|
"dist*",
|
|
|
|
|
"playground*",
|
|
|
|
|
"scripts*",
|
|
|
|
|
"tests*",
|
|
|
|
|
]
|
2024-01-08 04:37:50 +00:00
|
|
|
|
|
|
|
|
[tool.wheel]
|
2024-10-21 15:01:21 -07:00
|
|
|
exclude = [
|
|
|
|
|
"assets*",
|
|
|
|
|
"benchmark*",
|
|
|
|
|
"docs*",
|
|
|
|
|
"dist*",
|
|
|
|
|
"playground*",
|
|
|
|
|
"scripts*",
|
|
|
|
|
"tests*",
|
|
|
|
|
]
|
2025-05-11 00:55:00 -04:00
|
|
|
|
|
|
|
|
[tool.codespell]
|
|
|
|
|
ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"
|
|
|
|
|
skip = "*.json,*.jsonl,*.patch,*.txt"
|