setup router python binding ci (#1999)
This commit is contained in:
104
.github/workflows/release-pypi-router.yml
vendored
Normal file
104
.github/workflows/release-pypi-router.yml
vendored
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
# Reference: https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1
|
||||||
|
|
||||||
|
name: Release SGLang Router to PyPI
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
name: Build on ${{ matrix.os }} (${{ matrix.target }})
|
||||||
|
runs-on: ${{ matrix.os }}-latest
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- os: ubuntu
|
||||||
|
target: x86_64
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: sglang-repo
|
||||||
|
|
||||||
|
- name: Move rust folder to root and delete sglang-repo
|
||||||
|
run: |
|
||||||
|
mv sglang-repo/rust/* .
|
||||||
|
rm -rf sglang-repo
|
||||||
|
ls -alt
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Install build dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install -U pip
|
||||||
|
python -m pip install build twine auditwheel
|
||||||
|
|
||||||
|
- name: Build package
|
||||||
|
uses: pypa/cibuildwheel@v2.21.3
|
||||||
|
env:
|
||||||
|
CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64"
|
||||||
|
CIBW_BEFORE_ALL: |
|
||||||
|
yum update && yum install -y openssl-devel && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH"
|
||||||
|
|
||||||
|
- name: List built packages
|
||||||
|
run: ls -lh wheelhouse/
|
||||||
|
|
||||||
|
- name: Check packages
|
||||||
|
run: twine check --strict wheelhouse/*
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: packages-${{ matrix.os }}-${{ matrix.target }}
|
||||||
|
path: wheelhouse/
|
||||||
|
|
||||||
|
build-sdist:
|
||||||
|
name: Build SDist
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: sglang-repo
|
||||||
|
|
||||||
|
- name: Move rust folder to root and delete sglang-repo
|
||||||
|
run: |
|
||||||
|
mv sglang-repo/rust/* .
|
||||||
|
rm -rf sglang-repo
|
||||||
|
ls -alt
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Build SDist
|
||||||
|
run: |
|
||||||
|
pip install build
|
||||||
|
python -m build --sdist
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sdist
|
||||||
|
path: dist/*.tar.gz
|
||||||
|
|
||||||
|
upload:
|
||||||
|
name: Upload to PyPI
|
||||||
|
needs: [build, build-sdist]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
path: dist
|
||||||
|
merge-multiple: true
|
||||||
|
|
||||||
|
- name: Upload to PyPI
|
||||||
|
env:
|
||||||
|
TWINE_USERNAME: __token__
|
||||||
|
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||||
|
run: |
|
||||||
|
pip install twine
|
||||||
|
twine upload dist/* --verbose
|
||||||
2
rust/Cargo.lock
generated
2
rust/Cargo.lock
generated
@@ -2091,7 +2091,7 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sglang_router"
|
name = "sglang_router_rs"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "sglang_router"
|
name = "sglang_router_rs"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "sglang_router"
|
name = "sglang_router_rs"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
name = "sglang_router"
|
name = "sglang_router_rs"
|
||||||
# Pure Rust library: Just omit crate-type (defaults to rlib)
|
# Pure Rust library: Just omit crate-type (defaults to rlib)
|
||||||
# Python/C binding + Rust library: Use ["cdylib", "rlib"]
|
# Python/C binding + Rust library: Use ["cdylib", "rlib"]
|
||||||
crate-type = ["cdylib", "rlib"]
|
crate-type = ["cdylib", "rlib"]
|
||||||
|
|||||||
3
rust/MANIFEST.in
Normal file
3
rust/MANIFEST.in
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Must include:
|
||||||
|
include Cargo.toml # Rust project configuration
|
||||||
|
recursive-include src *.rs # Rust source files
|
||||||
71
rust/README.md
Normal file
71
rust/README.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# SGLang Router (Experimental)
|
||||||
|
|
||||||
|
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Rust and Cargo installed
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install rustup (Rust installer and version manager)
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||||
|
|
||||||
|
# Follow the installation prompts, then reload your shell
|
||||||
|
source $HOME/.cargo/env
|
||||||
|
|
||||||
|
# Verify installation
|
||||||
|
rustc --version
|
||||||
|
cargo --version
|
||||||
|
```
|
||||||
|
|
||||||
|
- Python with pip installed
|
||||||
|
|
||||||
|
## Build Process
|
||||||
|
|
||||||
|
### 1. Build Rust Project
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo build
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Build Python Binding
|
||||||
|
|
||||||
|
#### Option A: Build and Install Wheel
|
||||||
|
1. Build the wheel package:
|
||||||
|
```bash
|
||||||
|
pip install setuptools-rust wheel build
|
||||||
|
python -m build
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install the generated wheel:
|
||||||
|
```bash
|
||||||
|
pip install <path-to-wheel>
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Option B: Development Mode
|
||||||
|
|
||||||
|
For development purposes, you can install the package in editable mode:
|
||||||
|
```bash
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** When modifying Rust code, you must rebuild the wheel for changes to take effect.
|
||||||
|
|
||||||
|
## CI/CD Setup
|
||||||
|
|
||||||
|
The continuous integration pipeline consists of three main steps:
|
||||||
|
|
||||||
|
### 1. Build Wheels
|
||||||
|
- Uses `cibuildwheel` to create manylinux x86_64 packages
|
||||||
|
- Compatible with major Linux distributions (Ubuntu, CentOS, etc.)
|
||||||
|
- Additional configurations can be added to support other OS/architectures
|
||||||
|
- Reference: [cibuildwheel documentation](https://cibuildwheel.pypa.io/en/stable/)
|
||||||
|
|
||||||
|
### 2. Build Source Distribution
|
||||||
|
- Creates a source distribution containing the raw, unbuilt code
|
||||||
|
- Enables `pip` to build the package from source when prebuilt wheels are unavailable
|
||||||
|
|
||||||
|
### 3. Publish to PyPI
|
||||||
|
- Uploads both wheels and source distribution to PyPI
|
||||||
|
|
||||||
|
The CI configuration is based on the [tiktoken workflow](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1).
|
||||||
5
rust/py_src/__init__.py
Normal file
5
rust/py_src/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# a lightweihgt wrapper on router with argument type and comments
|
||||||
|
# no wrapper on policy type => direct export
|
||||||
|
from sglang_router_rs import PolicyType
|
||||||
|
|
||||||
|
from .router import Router
|
||||||
48
rust/py_src/router.py
Normal file
48
rust/py_src/router.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from sglang_router_rs import PolicyType
|
||||||
|
from sglang_router_rs import Router as _Router
|
||||||
|
|
||||||
|
|
||||||
|
class Router:
|
||||||
|
"""
|
||||||
|
A high-performance router for distributing requests across worker nodes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_urls: List of URLs for worker nodes that will handle requests
|
||||||
|
policy: Load balancing policy to use. Options:
|
||||||
|
- PolicyType.Random: Randomly select workers
|
||||||
|
- PolicyType.RoundRobin: Distribute requests in round-robin fashion
|
||||||
|
- PolicyType.ApproxTree: Tree-based routing using tokenizer similarity
|
||||||
|
host: Host address to bind the router server
|
||||||
|
port: Port number to bind the router server
|
||||||
|
tokenizer_path: Path to tokenizer model file (required for ApproxTree policy)
|
||||||
|
cache_threshold: Caching threshold value between 0-1
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
worker_urls: List[str],
|
||||||
|
policy: PolicyType = PolicyType.RoundRobin,
|
||||||
|
host: str = "127.0.0.1",
|
||||||
|
port: int = 3001,
|
||||||
|
tokenizer_path: Optional[str] = None,
|
||||||
|
cache_threshold: float = 0.50,
|
||||||
|
):
|
||||||
|
|
||||||
|
self._router = _Router(
|
||||||
|
worker_urls=worker_urls,
|
||||||
|
policy=policy,
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
tokenizer_path=tokenizer_path,
|
||||||
|
cache_threshold=cache_threshold,
|
||||||
|
)
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Start the router server.
|
||||||
|
|
||||||
|
This method blocks until the server is shut down.
|
||||||
|
"""
|
||||||
|
self._router.start()
|
||||||
@@ -1,15 +1,25 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["maturin>=1.5.1,<2.0"]
|
requires = ["setuptools>=45", "wheel", "setuptools-rust>=1.5.2"]
|
||||||
build-backend = "maturin"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "sglang_router"
|
name = "sglang-router"
|
||||||
requires-python = ">=3.9"
|
version = "0.0.2"
|
||||||
|
description = "SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances."
|
||||||
|
authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}]
|
||||||
|
requires-python = ">=3.8"
|
||||||
|
readme = "README.md"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Programming Language :: Rust",
|
|
||||||
"Programming Language :: Python :: Implementation :: CPython",
|
"Programming Language :: Python :: Implementation :: CPython",
|
||||||
|
"Programming Language :: Rust",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
|
||||||
|
|
||||||
[tool.maturin]
|
# https://github.com/PyO3/setuptools-rust?tab=readme-ov-file
|
||||||
bindings = 'pyo3'
|
[tool.setuptools.packages]
|
||||||
|
find = { where = ["py_src"] }
|
||||||
|
|
||||||
|
[[tool.setuptools-rust.ext-modules]]
|
||||||
|
target = "sglang_router_rs"
|
||||||
|
path = "Cargo.toml"
|
||||||
|
binding = "PyO3"
|
||||||
|
|||||||
130
rust/readme.md
130
rust/readme.md
@@ -1,130 +0,0 @@
|
|||||||
# SGLang Router (Experimental)
|
|
||||||
|
|
||||||
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
1. `src/`: rust impl of the router
|
|
||||||
2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
WIP. Ideally just
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install sglang-router
|
|
||||||
```
|
|
||||||
|
|
||||||
## Development
|
|
||||||
|
|
||||||
### Rust
|
|
||||||
|
|
||||||
1. Install Rust
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install rustup (Rust installer and version manager)
|
|
||||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
||||||
|
|
||||||
# Follow the installation prompts, then reload your shell
|
|
||||||
source $HOME/.cargo/env
|
|
||||||
|
|
||||||
# Verify installation
|
|
||||||
rustc --version
|
|
||||||
cargo --version
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Build the router
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Navigate to the rust directory
|
|
||||||
cd ./rust
|
|
||||||
|
|
||||||
# Build the project
|
|
||||||
cargo build
|
|
||||||
|
|
||||||
# Verify the binary works correctly
|
|
||||||
./target/debug/router --help
|
|
||||||
```
|
|
||||||
|
|
||||||
The help command will show available options:
|
|
||||||
```
|
|
||||||
Usage: router [OPTIONS]
|
|
||||||
|
|
||||||
Options:
|
|
||||||
--host <HOST> [default: 127.0.0.1]
|
|
||||||
--port <PORT> [default: 3001]
|
|
||||||
--worker-urls <WORKER_URLS>
|
|
||||||
--policy <POLICY> [default: round_robin] [possible values: round_robin, random]
|
|
||||||
-h, --help Print help
|
|
||||||
-V, --version Print version
|
|
||||||
```
|
|
||||||
|
|
||||||
### Python Binding
|
|
||||||
|
|
||||||
1. Create a virtual environment
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ python -m venv .venv
|
|
||||||
$ source .venv/bin/activate
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Install python dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ pip install maturin
|
|
||||||
$ pip install patchelf
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Install rust python binding
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ maturin develop
|
|
||||||
🔗 Found pyo3 bindings
|
|
||||||
🐍 Found CPython 3.10 at /home/jobuser/resources/sglang/rust/.venv/bin/python
|
|
||||||
📡 Using build options bindings from pyproject.toml
|
|
||||||
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.11s
|
|
||||||
📦 Built wheel for CPython 3.10 to /tmp/.tmpJb65sc/sglang_router-0.0.0-cp310-cp310-linux_x86_64.whl
|
|
||||||
✏️ Setting installed package as editable
|
|
||||||
🛠 Installed sglang_router-0.0.0
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ maturin build --interpreter python
|
|
||||||
...
|
|
||||||
Compiling pyo3 v0.22.6
|
|
||||||
Compiling pyo3-macros v0.22.6
|
|
||||||
Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust)
|
|
||||||
Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s
|
|
||||||
🖨 Copied external shared libraries to package sglang_router.libs directory:
|
|
||||||
/usr/lib/libssl.so.1.1.1k
|
|
||||||
/usr/lib/libcrypto.so.1.1.1k
|
|
||||||
📦 Built wheel for CPython 3.10 to <wheel path>
|
|
||||||
|
|
||||||
$ pip install <wheel path>
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
1. Launch worker instances
|
|
||||||
```bash
|
|
||||||
# Launch first worker on GPU 0
|
|
||||||
export CUDA_VISIBLE_DEVICES=0
|
|
||||||
python -m sglang.launch_server \
|
|
||||||
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
|
|
||||||
--host 127.0.0.1 \
|
|
||||||
--port 30000
|
|
||||||
|
|
||||||
# Launch second worker on GPU 1
|
|
||||||
export CUDA_VISIBLE_DEVICES=1
|
|
||||||
python -m sglang.launch_server \
|
|
||||||
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
|
|
||||||
--host 127.0.0.1 \
|
|
||||||
--port 30002
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Launch router and connect to workers
|
|
||||||
```bash
|
|
||||||
./target/debug/router --worker-urls http://127.0.0.1:30000,http://127.0.0.1:30002
|
|
||||||
```
|
|
||||||
@@ -90,7 +90,7 @@ impl Router {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[pymodule]
|
#[pymodule]
|
||||||
fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
fn sglang_router_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
m.add_class::<PolicyType>()?;
|
m.add_class::<PolicyType>()?;
|
||||||
m.add_class::<Router>()?;
|
m.add_class::<Router>()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1,18 +0,0 @@
|
|||||||
import sglang_router as router
|
|
||||||
|
|
||||||
# Create a Router instance with:
|
|
||||||
# - host: the address to bind to (e.g., "127.0.0.1")
|
|
||||||
# - port: the port number (e.g., 3001)
|
|
||||||
# - worker_urls: list of worker URLs to distribute requests to
|
|
||||||
router = router.Router(
|
|
||||||
host="127.0.0.1",
|
|
||||||
port=3001,
|
|
||||||
worker_urls=[
|
|
||||||
"http://localhost:30000",
|
|
||||||
"http://localhost:30002",
|
|
||||||
],
|
|
||||||
policy="random",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Start the router - this will block and run the server
|
|
||||||
router.start()
|
|
||||||
Reference in New Issue
Block a user