Dedicated toml files for CPU/XPU (#10734)
This commit is contained in:
5
.github/workflows/pr-test-xeon.yml
vendored
5
.github/workflows/pr-test-xeon.yml
vendored
@@ -42,7 +42,10 @@ jobs:
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
tag=v${version}-xeon
|
||||
|
||||
docker build . -f docker/Dockerfile.xeon -t sglang_xeon --no-cache
|
||||
docker build \
|
||||
--build-arg SGLANG_REPO=${{ github.event.pull_request.head.repo.clone_url }} \
|
||||
--build-arg VER_SGLANG=${{ github.head_ref }} \
|
||||
. -f docker/Dockerfile.xeon -t sglang_xeon --no-cache
|
||||
|
||||
- name: Run container
|
||||
run: |
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
FROM ubuntu:24.04
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG SGLANG_REPO=https://github.com/sgl-project/sglang.git
|
||||
ARG VER_SGLANG=main
|
||||
|
||||
ARG VER_TORCH=2.7.1
|
||||
ARG VER_TORCHVISION=0.22.1
|
||||
ARG VER_TRITON=3.3.1
|
||||
@@ -20,7 +22,7 @@ RUN apt-get update && \
|
||||
|
||||
WORKDIR /sgl-workspace
|
||||
|
||||
RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \
|
||||
RUN curl -fsSL -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/25.3.1-0/Miniforge3-25.3.1-0-Linux-x86_64.sh && \
|
||||
bash miniforge.sh -b -p ./miniforge3 && \
|
||||
rm -f miniforge.sh && \
|
||||
. miniforge3/bin/activate && \
|
||||
@@ -33,13 +35,14 @@ ENV CONDA_PREFIX=/sgl-workspace/miniforge3
|
||||
RUN pip config set global.index-url https://download.pytorch.org/whl/cpu && \
|
||||
pip config set global.extra-index-url https://pypi.org/simple
|
||||
|
||||
RUN git clone https://github.com/sgl-project/sglang.git && \
|
||||
RUN git clone ${SGLANG_REPO} && \
|
||||
cd sglang && \
|
||||
rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml && \
|
||||
git checkout ${VER_SGLANG} && \
|
||||
pip install -e "python[all_cpu]" && \
|
||||
cd python && \
|
||||
cp pyproject_cpu.toml pyproject.toml && \
|
||||
pip install . && \
|
||||
pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \
|
||||
cd sgl-kernel && \
|
||||
cd ../sgl-kernel && \
|
||||
cp pyproject_cpu.toml pyproject.toml && \
|
||||
pip install .
|
||||
|
||||
|
||||
@@ -82,15 +82,16 @@ cd sglang
|
||||
git checkout <YOUR-DESIRED-VERSION>
|
||||
|
||||
# Use dedicated toml file
|
||||
cp python/pyproject_other.toml python/pyproject.toml
|
||||
cd python
|
||||
cp pyproject_cpu.toml pyproject.toml
|
||||
# Install SGLang dependent libs, and build SGLang main package
|
||||
pip install --upgrade pip setuptools
|
||||
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
|
||||
pip install -e "python[all_cpu]"
|
||||
pip install .
|
||||
pip install torch==2.7.1 torchvision==0.22.1 triton==3.3.1 --force-reinstall
|
||||
|
||||
# Build the CPU backend kernels
|
||||
cd sgl-kernel
|
||||
cd ../sgl-kernel
|
||||
cp pyproject_cpu.toml pyproject.toml
|
||||
pip install .
|
||||
|
||||
|
||||
123
python/pyproject_cpu.toml
Normal file
123
python/pyproject_cpu.toml
Normal file
@@ -0,0 +1,123 @@
|
||||
# https://docs.sglang.ai/platforms/cpu_server.html
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "sglang"
|
||||
version = "0.5.3rc0"
|
||||
description = "SGLang is a fast serving framework for large language models and vision language models."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = { file = "LICENSE" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"aiohttp",
|
||||
"anthropic>=0.20.0",
|
||||
"blobfile==3.0.0",
|
||||
"build",
|
||||
"compressed-tensors",
|
||||
"datasets",
|
||||
"decord",
|
||||
"einops",
|
||||
"fastapi",
|
||||
"hf_transfer",
|
||||
"huggingface_hub",
|
||||
"intel-openmp",
|
||||
"interegular",
|
||||
"IPython",
|
||||
"llguidance>=0.7.11,<0.8.0",
|
||||
"modelscope",
|
||||
"msgspec",
|
||||
"ninja",
|
||||
"numpy",
|
||||
"openai==1.99.1",
|
||||
"openai-harmony==0.0.4",
|
||||
"orjson",
|
||||
"outlines==0.1.11",
|
||||
"packaging",
|
||||
"partial_json_parser",
|
||||
"pillow",
|
||||
"prometheus-client>=0.20.0",
|
||||
"psutil",
|
||||
"pybase64",
|
||||
"pydantic",
|
||||
"python-multipart",
|
||||
"pyzmq>=25.1.2",
|
||||
"requests",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"setproctitle",
|
||||
"soundfile==0.13.1",
|
||||
"tiktoken",
|
||||
"timm==1.0.16",
|
||||
"torchao==0.9.0",
|
||||
"tqdm",
|
||||
"transformers==4.56.1",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.24",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
tracing = [
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-api",
|
||||
"opentelemetry-exporter-otlp",
|
||||
"opentelemetry-exporter-otlp-proto-grpc",
|
||||
]
|
||||
|
||||
test = [
|
||||
"accelerate",
|
||||
"expecttest",
|
||||
"jsonlines",
|
||||
"matplotlib",
|
||||
"pandas",
|
||||
"peft",
|
||||
"sentence_transformers",
|
||||
"pytest",
|
||||
"tabulate",
|
||||
]
|
||||
|
||||
dev = ["sglang", "sglang[test]"]
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://github.com/sgl-project/sglang"
|
||||
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"sglang" = [
|
||||
"srt/layers/moe/fused_moe_triton/configs/*/*.json",
|
||||
"srt/layers/quantization/configs/*.json",
|
||||
"srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
exclude = [
|
||||
"assets*",
|
||||
"benchmark*",
|
||||
"docs*",
|
||||
"dist*",
|
||||
"playground*",
|
||||
"scripts*",
|
||||
"tests*",
|
||||
]
|
||||
|
||||
[tool.wheel]
|
||||
exclude = [
|
||||
"assets*",
|
||||
"benchmark*",
|
||||
"docs*",
|
||||
"dist*",
|
||||
"playground*",
|
||||
"scripts*",
|
||||
"tests*",
|
||||
]
|
||||
|
||||
[tool.codespell]
|
||||
ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"
|
||||
skip = "*.json,*.jsonl,*.patch,*.txt"
|
||||
123
python/pyproject_xpu.toml
Normal file
123
python/pyproject_xpu.toml
Normal file
@@ -0,0 +1,123 @@
|
||||
# xpu is not enabled in public vllm and torch whl,
|
||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.html install vllm
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "sglang"
|
||||
version = "0.5.3rc0"
|
||||
description = "SGLang is a fast serving framework for large language models and vision language models."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = { file = "LICENSE" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"aiohttp",
|
||||
"anthropic>=0.20.0",
|
||||
"blobfile==3.0.0",
|
||||
"build",
|
||||
"compressed-tensors",
|
||||
"datasets",
|
||||
"decord",
|
||||
"einops",
|
||||
"fastapi",
|
||||
"hf_transfer",
|
||||
"huggingface_hub",
|
||||
"interegular",
|
||||
"IPython",
|
||||
"llguidance>=0.7.11,<0.8.0",
|
||||
"modelscope",
|
||||
"msgspec",
|
||||
"ninja",
|
||||
"numpy",
|
||||
"openai==1.99.1",
|
||||
"openai-harmony==0.0.4",
|
||||
"orjson",
|
||||
"outlines==0.1.11",
|
||||
"packaging",
|
||||
"partial_json_parser",
|
||||
"pillow",
|
||||
"prometheus-client>=0.20.0",
|
||||
"psutil",
|
||||
"pybase64",
|
||||
"pydantic",
|
||||
"python-multipart",
|
||||
"pyzmq>=25.1.2",
|
||||
"requests",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"setproctitle",
|
||||
"soundfile==0.13.1",
|
||||
"tiktoken",
|
||||
"timm==1.0.16",
|
||||
"torchao==0.9.0",
|
||||
"tqdm",
|
||||
"transformers==4.56.1",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.24",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
tracing = [
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-api",
|
||||
"opentelemetry-exporter-otlp",
|
||||
"opentelemetry-exporter-otlp-proto-grpc",
|
||||
]
|
||||
|
||||
test = [
|
||||
"accelerate",
|
||||
"expecttest",
|
||||
"jsonlines",
|
||||
"matplotlib",
|
||||
"pandas",
|
||||
"peft",
|
||||
"sentence_transformers",
|
||||
"pytest",
|
||||
"tabulate",
|
||||
]
|
||||
|
||||
dev = ["sglang", "sglang[test]"]
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://github.com/sgl-project/sglang"
|
||||
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"sglang" = [
|
||||
"srt/layers/moe/fused_moe_triton/configs/*/*.json",
|
||||
"srt/layers/quantization/configs/*.json",
|
||||
"srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
exclude = [
|
||||
"assets*",
|
||||
"benchmark*",
|
||||
"docs*",
|
||||
"dist*",
|
||||
"playground*",
|
||||
"scripts*",
|
||||
"tests*",
|
||||
]
|
||||
|
||||
[tool.wheel]
|
||||
exclude = [
|
||||
"assets*",
|
||||
"benchmark*",
|
||||
"docs*",
|
||||
"dist*",
|
||||
"playground*",
|
||||
"scripts*",
|
||||
"tests*",
|
||||
]
|
||||
|
||||
[tool.codespell]
|
||||
ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment"
|
||||
skip = "*.json,*.jsonl,*.patch,*.txt"
|
||||
Reference in New Issue
Block a user