sglang/python/pyproject.toml

[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "sglang"
version = "0.5.0rc0"
description = "SGLang is yet another fast serving framework for large language models and vision language models."
readme = "README.md"
requires-python = ">=3.10"
license = { file = "LICENSE" }
classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: Apache Software License",
]
dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"]

[project.optional-dependencies]
runtime_common = [
    "blobfile==3.0.0",
    "build",
    "compressed-tensors",
    "datasets",
    "einops",
    "fastapi",
    "hf_transfer",
    "huggingface_hub",
    "interegular",
    "llguidance>=0.7.11,<0.8.0",
    "modelscope",
    "msgspec",
    "ninja",
    "openai==1.99.1",
    "openai-harmony==0.0.3",
    "orjson",
    "outlines==0.1.11",
    "packaging",
    "partial_json_parser",
    "pillow",
    "prometheus-client>=0.20.0",
    "psutil",
    "pydantic",
    "pynvml",
    "pybase64",
    "python-multipart",
    "pyzmq>=25.1.2",
    "sentencepiece",
    "soundfile==0.13.1",
    "scipy",
    "timm==1.0.16",
    "tiktoken",
    "torchao==0.9.0",
    "transformers==4.55.0",
    "uvicorn",
    "uvloop",
    "xgrammar==0.1.22",
]

srt = [
    "sglang[runtime_common]",
    "sgl-kernel==0.3.4",
    "torch==2.8.0",
    "torchaudio==2.8.0",
    "torchvision",
    "cuda-python",
    "flashinfer_python==0.2.11.post1",
]

blackwell = [
    "sglang[runtime_common]",
    "sgl-kernel",
    "torch==2.8.0",
    "torchaudio==2.8.0",
    "torchvision",
    "cuda-python",
    "flashinfer_python==0.2.11.post1",
]

# HIP (Heterogeneous-computing Interface for Portability) for AMD
# => base docker rocm/vllm-dev:20250114, not from public vllm whl
srt_hip = [
    "sglang[runtime_common]",
    "torch",
    "petit_kernel==0.0.2",
    "wave-lang==1.0.1",
]

# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]

# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu = ["sglang[runtime_common]"]

# For Intel Gaudi(device : hpu) follow the installation guide
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]

# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"]

openai = ["openai==1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
torch_memory_saver = ["torch_memory_saver==0.0.8"]
decord = ["decord"]
test = [
    "accelerate",
    "expecttest",
    "jsonlines",
    "matplotlib",
    "pandas",
    "peft",
    "sentence_transformers",
    "pytest",
]
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]

dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"]
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
dev_cpu = ["sglang[all_cpu]", "sglang[test]"]

[project.urls]
"Homepage" = "https://github.com/sgl-project/sglang"
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"

[tool.setuptools.package-data]
"sglang" = [
    "srt/layers/moe/fused_moe_triton/configs/*/*.json",
    "srt/layers/quantization/configs/*.json",
    "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
]

[tool.setuptools.packages.find]
exclude = [
    "assets*",
    "benchmark*",
    "docs*",
    "dist*",
    "playground*",
    "scripts*",
    "tests*",
]

[tool.wheel]
exclude = [
    "assets*",
    "benchmark*",
    "docs*",
    "dist*",
    "playground*",
    "scripts*",
    "tests*",
]

[tool.codespell]
ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"
skip = "*.json,*.jsonl,*.patch,*.txt"
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`[build-system]`
update pyproject (#4731) 2025-03-24 09:50:28 -07:00			`requires = ["setuptools>=61.0", "wheel"]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`build-backend = "setuptools.build_meta"`

			`[project]`
			`name = "sglang"`
chore: bump v0.5.0rc0 (#8959) 2025-08-08 09:16:18 -07:00			`version = "0.5.0rc0"`
misc: update SGLang package description (#659) 2024-07-20 02:27:39 +10:00			`description = "SGLang is yet another fast serving framework for large language models and vision language models."`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`readme = "README.md"`
chore(deps): update minimum python to 3.10 (#8984) 2025-08-09 00:30:23 -07:00			`requires-python = ">=3.10"`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`license = { file = "LICENSE" }`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`classifiers = [`
			`"Programming Language :: Python :: 3",`
			`"License :: OSI Approved :: Apache Software License",`
			`]`
Move `aiohttp` into public dependencies (#3980) 2025-03-13 12:42:57 +08:00			`dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
			`[project.optional-dependencies]`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`runtime_common = [`
Support Phi-4 Multi-Modal (text + vision only) (#6494) 2025-05-24 21:43:38 -07:00			`"blobfile==3.0.0",`
[CI] Add CI Testing for Prefill-Decode Disaggregation with Router (#7540) 2025-06-27 00:18:56 -07:00			`"build",`
Clean up `import vllm` in quantization/__init__.py (#4834) 2025-03-28 10:34:10 -07:00			`"compressed-tensors",`
[EAGLE] many fixes for eagle (#4195) Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: Sehoon Kim <sehoon@x.ai> 2025-03-07 22:12:13 -08:00			`"datasets",`
Clean up server_args.py to have a dedicated function for model specific adjustments (#8983) 2025-08-08 19:56:50 -07:00			`"einops",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"fastapi",`
			`"hf_transfer",`
			`"huggingface_hub",`
			`"interegular",`
update llguidance to 0.7.11; adds StructTag (#4870) 2025-04-26 20:13:57 -07:00			`"llguidance>=0.7.11,<0.8.0",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"modelscope",`
[Metrics] Add KV events publishing (#6098) 2025-05-19 14:19:54 -07:00			`"msgspec",`
[EAGLE] many fixes for eagle (#4195) Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: Sehoon Kim <sehoon@x.ai> 2025-03-07 22:12:13 -08:00			`"ninja",`
Clean up server_args.py to have a dedicated function for model specific adjustments (#8983) 2025-08-08 19:56:50 -07:00			`"openai==1.99.1",`
Support v1/responses and use harmony in serving_chat (#8837) Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com> 2025-08-06 16:20:34 -07:00			`"openai-harmony==0.0.3",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"orjson",`
Fix a bug in BatchTokenIDOut & Misc style and dependency updates (#7457) 2025-06-23 06:20:39 -07:00			`"outlines==0.1.11",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"packaging",`
[fix] fix pyproject.toml dependencies (#6119) 2025-05-08 17:14:36 +08:00			`"partial_json_parser",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"pillow",`
			`"prometheus-client>=0.20.0",`
			`"psutil",`
			`"pydantic",`
Support page size > 1 + eagle (#4908) 2025-03-30 00:46:23 -07:00			`"pynvml",`
[Multimodal][Perf] Use `pybase64` instead of `base64` (#7724) 2025-07-08 17:00:58 -04:00			`"pybase64",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"python-multipart",`
			`"pyzmq>=25.1.2",`
add sentencepiece as dependency explicitly (#7922) 2025-07-10 16:06:27 +08:00			`"sentencepiece",`
model: Minicpmo (#3023) 2025-03-25 11:08:40 +08:00			`"soundfile==0.13.1",`
Support Phi-4 Multi-Modal (text + vision only) (#6494) 2025-05-24 21:43:38 -07:00			`"scipy",`
Updates transformers and timm dependencies (#7577) Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> 2025-06-27 00:30:17 -07:00			`"timm==1.0.16",`
Clean up server_args.py to have a dedicated function for model specific adjustments (#8983) 2025-08-08 19:56:50 -07:00			`"tiktoken",`
Simplify frontend language (#9029) 2025-08-10 10:59:30 -07:00			`"torchao==0.9.0",`
			`"transformers==4.55.0",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"uvicorn",`
			`"uvloop",`
upgrade xgrammar 0.1.22 (#8522) 2025-08-02 01:59:15 +03:00			`"xgrammar==0.1.22",`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`]`
[Minor] more code cleanup (#4077) 2025-03-04 21:23:47 -08:00
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`srt = [`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"sglang[runtime_common]",`
Fuse writing KV buffer into rope kernel (part 2: srt) (#9014) Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> 2025-08-12 13:15:30 -07:00			`"sgl-kernel==0.3.4",`
chore: upgrade torch 2.8.0 (#8836) 2025-08-05 17:32:01 -07:00			`"torch==2.8.0",`
			`"torchaudio==2.8.0",`
			`"torchvision",`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00			`"cuda-python",`
fix: use flashinfer v0.2.11.post1 2025-08-11 02:49:25 -07:00			`"flashinfer_python==0.2.11.post1",`
Misc fix for min_p_sampling, --cuda-graph-bs (#2761) 2025-01-07 02:52:53 -08:00			`]`
[minor] Improve code style and compatibility (#1961) 2024-11-08 02:19:41 -08:00
feat: add blackwell Dockerfile (#5302) 2025-04-11 13:08:53 -07:00			`blackwell = [`
			`"sglang[runtime_common]",`
			`"sgl-kernel",`
chore: upgrade torch 2.8.0 (#8836) 2025-08-05 17:32:01 -07:00			`"torch==2.8.0",`
			`"torchaudio==2.8.0",`
			`"torchvision",`
feat: add blackwell Dockerfile (#5302) 2025-04-11 13:08:53 -07:00			`"cuda-python",`
fix: use flashinfer v0.2.11.post1 2025-08-11 02:49:25 -07:00			`"flashinfer_python==0.2.11.post1",`
feat: add blackwell Dockerfile (#5302) 2025-04-11 13:08:53 -07:00			`]`

[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861) 2024-10-31 16:38:16 -07:00			`# HIP (Heterogeneous-computing Interface for Portability) for AMD`
Revert "fix some typos" (#6244) 2025-05-12 12:53:26 -07:00			`# => base docker rocm/vllm-dev:20250114, not from public vllm whl`
Clean up `import vllm` in quantization/__init__.py (#4834) 2025-03-28 10:34:10 -07:00			`srt_hip = [`
			`"sglang[runtime_common]",`
			`"torch",`
Pin the version of petit kernel to fix the APIs (#8235) 2025-07-23 17:57:20 -07:00			`"petit_kernel==0.0.2",`
[AMD] Support Wave attention backend with AMD GPU optimizations (#8660) Signed-off-by: Stanley Winata <stanley.winata@amd.com> Signed-off-by: Harsh Menon <harsh@nod-labs.com> Signed-off-by: nithinsubbiah <nithinsubbiah@gmail.com> Signed-off-by: Ivan Butygin <ivan.butygin@gmail.com> Signed-off-by: xintin <gaurav.verma@amd.com> Co-authored-by: Harsh Menon <harsh@nod-labs.com> Co-authored-by: Stanley Winata <stanley.winata@amd.com> Co-authored-by: Stanley Winata <68087699+raikonenfnu@users.noreply.github.com> Co-authored-by: Stanley Winata <stanley@nod-labs.com> Co-authored-by: Ivan Butygin <ivan.butygin@gmail.com> Co-authored-by: nithinsubbiah <nithinsubbiah@gmail.com> Co-authored-by: Nithin Meganathan <18070964+nithinsubbiah@users.noreply.github.com> Co-authored-by: Ivan Butygin <ibutygin@amd.com> 2025-08-13 04:49:11 +08:00			`"wave-lang==1.0.1",`
Clean up `import vllm` in quantization/__init__.py (#4834) 2025-03-28 10:34:10 -07:00			`]`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00
Simplify frontend language (#9029) 2025-08-10 10:59:30 -07:00			`# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu`
			`srt_cpu = ["sglang[runtime_common]", "einops"]`

Revert "fix some typos" (#6244) 2025-05-12 12:53:26 -07:00			`# xpu is not enabled in public vllm and torch whl,`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00			`# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm`
Fix a bug in BatchTokenIDOut & Misc style and dependency updates (#7457) 2025-06-23 06:20:39 -07:00			`srt_xpu = ["sglang[runtime_common]"]`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00
			`# For Intel Gaudi(device : hpu) follow the installation guide`
			`# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html`
Fix a bug in BatchTokenIDOut & Misc style and dependency updates (#7457) 2025-06-23 06:20:39 -07:00			`srt_hpu = ["sglang[runtime_common]"]`
Fix dependency (#3813) 2025-02-24 03:50:58 -08:00
[Feature] Support for Ascend NPU backend (#3853) Signed-off-by: Song Zhang <gepin.zs@antgroup.com> Co-authored-by: 22dimensions <waitingwind@foxmail.com> 2025-05-07 11:32:53 +08:00			`# https://vllm-ascend.readthedocs.io/en/latest/installation.html`
Fix a bug in BatchTokenIDOut & Misc style and dependency updates (#7457) 2025-06-23 06:20:39 -07:00			`srt_npu = ["sglang[runtime_common]"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00
fix: use openai 1.99.1 (#8927) 2025-08-07 14:20:35 -07:00			`openai = ["openai==1.99.1", "tiktoken"]`
Fix missing numpy dependency in pyproject.toml (#524) 2024-06-10 21:13:50 +02:00			`anthropic = ["anthropic>=0.20.0"]`
Litellm Backend (#502) 2024-06-08 03:24:28 +08:00			`litellm = ["litellm>=1.0.0"]`
Clean up server_args.py to have a dedicated function for model specific adjustments (#8983) 2025-08-08 19:56:50 -07:00			`torch_memory_saver = ["torch_memory_saver==0.0.8"]`
[Fix] Improve dependencies for Blackwell image (#6334) 2025-05-16 12:38:22 -07:00			`decord = ["decord"]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`test = [`
Minor improvements of TokenizerManager / health check (#6327) 2025-05-15 15:29:25 -07:00			`"accelerate",`
Create cancel-all-pr-test-runs (#8986) 2025-08-08 15:53:51 -07:00			`"expecttest",`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`"jsonlines",`
			`"matplotlib",`
			`"pandas",`
			`"peft",`
Minor improvements of TokenizerManager / health check (#6327) 2025-05-15 15:29:25 -07:00			`"sentence_transformers",`
Tiny fix CI pytest error (#8524) 2025-08-03 13:48:42 +08:00			`"pytest",`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`]`
Simplify frontend language (#9029) 2025-08-10 10:59:30 -07:00			`all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]`
			`all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]`
			`all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]`
			`all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]`
			`all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]`
			`all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]`
[Minor] Improve code style (#2422) 2024-12-09 06:30:35 -08:00
latency test enhancement - final part (#921) 2024-08-04 18:15:23 -07:00			`dev = ["sglang[all]", "sglang[test]"]`
[Build, ROCm] Dockerfile.rocm for Instinct GPUs, with package updates (#1861) 2024-10-31 16:38:16 -07:00			`dev_hip = ["sglang[all_hip]", "sglang[test]"]`
[Feature, Hardware] Enable SGLang on XPU GPUs via PyTorch (#1480) 2024-10-13 02:10:32 +08:00			`dev_xpu = ["sglang[all_xpu]", "sglang[test]"]`
Add initial support for intel Gaudi accelerators (#2121) 2024-11-23 09:52:23 +05:30			`dev_hpu = ["sglang[all_hpu]", "sglang[test]"]`
Enable CPU device on SGLang (#2806) 2025-01-17 13:22:53 +08:00			`dev_cpu = ["sglang[all_cpu]", "sglang[test]"]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
Fix test cases (#6) 2024-01-15 01:15:53 -08:00			`[project.urls]`
			`"Homepage" = "https://github.com/sgl-project/sglang"`
			`"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"`

fix: package data missing (#2521) 2024-12-27 00:16:48 +08:00			`[tool.setuptools.package-data]`
Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988) Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu> 2025-03-03 00:12:04 -08:00			`"sglang" = [`
Fix fused_moe triton configs (#7029) 2025-06-10 14:23:03 +08:00			`"srt/layers/moe/fused_moe_triton/configs//.json",`
Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988) Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu> 2025-03-03 00:12:04 -08:00			`"srt/layers/quantization/configs/*.json",`
Add hf3fs_utils.cpp to package-data (#8653) 2025-08-01 12:41:09 +08:00			`"srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",`
Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988) Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu> 2025-03-03 00:12:04 -08:00			`]`
fix: package data missing (#2521) 2024-12-27 00:16:48 +08:00
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00			`[tool.setuptools.packages.find]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`exclude = [`
			`"assets*",`
			`"benchmark*",`
			`"docs*",`
			`"dist*",`
			`"playground*",`
			`"scripts*",`
			`"tests*",`
			`]`
release initial code Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com> 2024-01-08 04:37:50 +00:00
			`[tool.wheel]`
Llama3.2 vision model support (#1551) 2024-10-21 15:01:21 -07:00			`exclude = [`
			`"assets*",`
			`"benchmark*",`
			`"docs*",`
			`"dist*",`
			`"playground*",`
			`"scripts*",`
			`"tests*",`
			`]`
Add typo checker in pre-commit (#6179) Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca> 2025-05-11 00:55:00 -04:00
			`[tool.codespell]`
			`ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"`
			`skip = ".json,.jsonl,.patch,.txt"`