setup router python binding ci (#1999)
This commit is contained in:
104
.github/workflows/release-pypi-router.yml
vendored
Normal file
104
.github/workflows/release-pypi-router.yml
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
# Reference: https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1
|
||||
|
||||
name: Release SGLang Router to PyPI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build on ${{ matrix.os }} (${{ matrix.target }})
|
||||
runs-on: ${{ matrix.os }}-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu
|
||||
target: x86_64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
path: sglang-repo
|
||||
|
||||
- name: Move rust folder to root and delete sglang-repo
|
||||
run: |
|
||||
mv sglang-repo/rust/* .
|
||||
rm -rf sglang-repo
|
||||
ls -alt
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install -U pip
|
||||
python -m pip install build twine auditwheel
|
||||
|
||||
- name: Build package
|
||||
uses: pypa/cibuildwheel@v2.21.3
|
||||
env:
|
||||
CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64"
|
||||
CIBW_BEFORE_ALL: |
|
||||
yum update && yum install -y openssl-devel && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH"
|
||||
|
||||
- name: List built packages
|
||||
run: ls -lh wheelhouse/
|
||||
|
||||
- name: Check packages
|
||||
run: twine check --strict wheelhouse/*
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: packages-${{ matrix.os }}-${{ matrix.target }}
|
||||
path: wheelhouse/
|
||||
|
||||
build-sdist:
|
||||
name: Build SDist
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
path: sglang-repo
|
||||
|
||||
- name: Move rust folder to root and delete sglang-repo
|
||||
run: |
|
||||
mv sglang-repo/rust/* .
|
||||
rm -rf sglang-repo
|
||||
ls -alt
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Build SDist
|
||||
run: |
|
||||
pip install build
|
||||
python -m build --sdist
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sdist
|
||||
path: dist/*.tar.gz
|
||||
|
||||
upload:
|
||||
name: Upload to PyPI
|
||||
needs: [build, build-sdist]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
merge-multiple: true
|
||||
|
||||
- name: Upload to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||
run: |
|
||||
pip install twine
|
||||
twine upload dist/* --verbose
|
||||
2
rust/Cargo.lock
generated
2
rust/Cargo.lock
generated
@@ -2091,7 +2091,7 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sglang_router"
|
||||
name = "sglang_router_rs"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
[package]
|
||||
name = "sglang_router"
|
||||
name = "sglang_router_rs"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "sglang_router"
|
||||
name = "sglang_router_rs"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lib]
|
||||
name = "sglang_router"
|
||||
name = "sglang_router_rs"
|
||||
# Pure Rust library: Just omit crate-type (defaults to rlib)
|
||||
# Python/C binding + Rust library: Use ["cdylib", "rlib"]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
3
rust/MANIFEST.in
Normal file
3
rust/MANIFEST.in
Normal file
@@ -0,0 +1,3 @@
|
||||
# Must include:
|
||||
include Cargo.toml # Rust project configuration
|
||||
recursive-include src *.rs # Rust source files
|
||||
71
rust/README.md
Normal file
71
rust/README.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# SGLang Router (Experimental)
|
||||
|
||||
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Rust and Cargo installed
|
||||
|
||||
```bash
|
||||
# Install rustup (Rust installer and version manager)
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
|
||||
# Follow the installation prompts, then reload your shell
|
||||
source $HOME/.cargo/env
|
||||
|
||||
# Verify installation
|
||||
rustc --version
|
||||
cargo --version
|
||||
```
|
||||
|
||||
- Python with pip installed
|
||||
|
||||
## Build Process
|
||||
|
||||
### 1. Build Rust Project
|
||||
|
||||
```bash
|
||||
cargo build
|
||||
```
|
||||
|
||||
### 2. Build Python Binding
|
||||
|
||||
#### Option A: Build and Install Wheel
|
||||
1. Build the wheel package:
|
||||
```bash
|
||||
pip install setuptools-rust wheel build
|
||||
python -m build
|
||||
```
|
||||
|
||||
2. Install the generated wheel:
|
||||
```bash
|
||||
pip install <path-to-wheel>
|
||||
```
|
||||
|
||||
#### Option B: Development Mode
|
||||
|
||||
For development purposes, you can install the package in editable mode:
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
**Note:** When modifying Rust code, you must rebuild the wheel for changes to take effect.
|
||||
|
||||
## CI/CD Setup
|
||||
|
||||
The continuous integration pipeline consists of three main steps:
|
||||
|
||||
### 1. Build Wheels
|
||||
- Uses `cibuildwheel` to create manylinux x86_64 packages
|
||||
- Compatible with major Linux distributions (Ubuntu, CentOS, etc.)
|
||||
- Additional configurations can be added to support other OS/architectures
|
||||
- Reference: [cibuildwheel documentation](https://cibuildwheel.pypa.io/en/stable/)
|
||||
|
||||
### 2. Build Source Distribution
|
||||
- Creates a source distribution containing the raw, unbuilt code
|
||||
- Enables `pip` to build the package from source when prebuilt wheels are unavailable
|
||||
|
||||
### 3. Publish to PyPI
|
||||
- Uploads both wheels and source distribution to PyPI
|
||||
|
||||
The CI configuration is based on the [tiktoken workflow](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1).
|
||||
5
rust/py_src/__init__.py
Normal file
5
rust/py_src/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# a lightweihgt wrapper on router with argument type and comments
|
||||
# no wrapper on policy type => direct export
|
||||
from sglang_router_rs import PolicyType
|
||||
|
||||
from .router import Router
|
||||
48
rust/py_src/router.py
Normal file
48
rust/py_src/router.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from sglang_router_rs import PolicyType
|
||||
from sglang_router_rs import Router as _Router
|
||||
|
||||
|
||||
class Router:
|
||||
"""
|
||||
A high-performance router for distributing requests across worker nodes.
|
||||
|
||||
Args:
|
||||
worker_urls: List of URLs for worker nodes that will handle requests
|
||||
policy: Load balancing policy to use. Options:
|
||||
- PolicyType.Random: Randomly select workers
|
||||
- PolicyType.RoundRobin: Distribute requests in round-robin fashion
|
||||
- PolicyType.ApproxTree: Tree-based routing using tokenizer similarity
|
||||
host: Host address to bind the router server
|
||||
port: Port number to bind the router server
|
||||
tokenizer_path: Path to tokenizer model file (required for ApproxTree policy)
|
||||
cache_threshold: Caching threshold value between 0-1
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
worker_urls: List[str],
|
||||
policy: PolicyType = PolicyType.RoundRobin,
|
||||
host: str = "127.0.0.1",
|
||||
port: int = 3001,
|
||||
tokenizer_path: Optional[str] = None,
|
||||
cache_threshold: float = 0.50,
|
||||
):
|
||||
|
||||
self._router = _Router(
|
||||
worker_urls=worker_urls,
|
||||
policy=policy,
|
||||
host=host,
|
||||
port=port,
|
||||
tokenizer_path=tokenizer_path,
|
||||
cache_threshold=cache_threshold,
|
||||
)
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start the router server.
|
||||
|
||||
This method blocks until the server is shut down.
|
||||
"""
|
||||
self._router.start()
|
||||
@@ -1,15 +1,25 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.5.1,<2.0"]
|
||||
build-backend = "maturin"
|
||||
requires = ["setuptools>=45", "wheel", "setuptools-rust>=1.5.2"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "sglang_router"
|
||||
requires-python = ">=3.9"
|
||||
name = "sglang-router"
|
||||
version = "0.0.2"
|
||||
description = "SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances."
|
||||
authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}]
|
||||
requires-python = ">=3.8"
|
||||
readme = "README.md"
|
||||
classifiers = [
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python :: 3",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
[tool.maturin]
|
||||
bindings = 'pyo3'
|
||||
# https://github.com/PyO3/setuptools-rust?tab=readme-ov-file
|
||||
[tool.setuptools.packages]
|
||||
find = { where = ["py_src"] }
|
||||
|
||||
[[tool.setuptools-rust.ext-modules]]
|
||||
target = "sglang_router_rs"
|
||||
path = "Cargo.toml"
|
||||
binding = "PyO3"
|
||||
|
||||
130
rust/readme.md
130
rust/readme.md
@@ -1,130 +0,0 @@
|
||||
# SGLang Router (Experimental)
|
||||
|
||||
SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances.
|
||||
|
||||
## Architecture
|
||||
|
||||
1. `src/`: rust impl of the router
|
||||
2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package
|
||||
|
||||
## Installation
|
||||
|
||||
WIP. Ideally just
|
||||
|
||||
```bash
|
||||
pip install sglang-router
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Rust
|
||||
|
||||
1. Install Rust
|
||||
|
||||
```bash
|
||||
# Install rustup (Rust installer and version manager)
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
|
||||
# Follow the installation prompts, then reload your shell
|
||||
source $HOME/.cargo/env
|
||||
|
||||
# Verify installation
|
||||
rustc --version
|
||||
cargo --version
|
||||
```
|
||||
|
||||
2. Build the router
|
||||
|
||||
```bash
|
||||
# Navigate to the rust directory
|
||||
cd ./rust
|
||||
|
||||
# Build the project
|
||||
cargo build
|
||||
|
||||
# Verify the binary works correctly
|
||||
./target/debug/router --help
|
||||
```
|
||||
|
||||
The help command will show available options:
|
||||
```
|
||||
Usage: router [OPTIONS]
|
||||
|
||||
Options:
|
||||
--host <HOST> [default: 127.0.0.1]
|
||||
--port <PORT> [default: 3001]
|
||||
--worker-urls <WORKER_URLS>
|
||||
--policy <POLICY> [default: round_robin] [possible values: round_robin, random]
|
||||
-h, --help Print help
|
||||
-V, --version Print version
|
||||
```
|
||||
|
||||
### Python Binding
|
||||
|
||||
1. Create a virtual environment
|
||||
|
||||
```bash
|
||||
$ python -m venv .venv
|
||||
$ source .venv/bin/activate
|
||||
```
|
||||
|
||||
2. Install python dependencies
|
||||
|
||||
```bash
|
||||
$ pip install maturin
|
||||
$ pip install patchelf
|
||||
```
|
||||
|
||||
3. Install rust python binding
|
||||
|
||||
```bash
|
||||
$ maturin develop
|
||||
🔗 Found pyo3 bindings
|
||||
🐍 Found CPython 3.10 at /home/jobuser/resources/sglang/rust/.venv/bin/python
|
||||
📡 Using build options bindings from pyproject.toml
|
||||
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.11s
|
||||
📦 Built wheel for CPython 3.10 to /tmp/.tmpJb65sc/sglang_router-0.0.0-cp310-cp310-linux_x86_64.whl
|
||||
✏️ Setting installed package as editable
|
||||
🛠 Installed sglang_router-0.0.0
|
||||
```
|
||||
|
||||
4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it
|
||||
|
||||
```bash
|
||||
$ maturin build --interpreter python
|
||||
...
|
||||
Compiling pyo3 v0.22.6
|
||||
Compiling pyo3-macros v0.22.6
|
||||
Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust)
|
||||
Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s
|
||||
🖨 Copied external shared libraries to package sglang_router.libs directory:
|
||||
/usr/lib/libssl.so.1.1.1k
|
||||
/usr/lib/libcrypto.so.1.1.1k
|
||||
📦 Built wheel for CPython 3.10 to <wheel path>
|
||||
|
||||
$ pip install <wheel path>
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Launch worker instances
|
||||
```bash
|
||||
# Launch first worker on GPU 0
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||
--host 127.0.0.1 \
|
||||
--port 30000
|
||||
|
||||
# Launch second worker on GPU 1
|
||||
export CUDA_VISIBLE_DEVICES=1
|
||||
python -m sglang.launch_server \
|
||||
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||
--host 127.0.0.1 \
|
||||
--port 30002
|
||||
```
|
||||
|
||||
2. Launch router and connect to workers
|
||||
```bash
|
||||
./target/debug/router --worker-urls http://127.0.0.1:30000,http://127.0.0.1:30002
|
||||
```
|
||||
@@ -90,7 +90,7 @@ impl Router {
|
||||
}
|
||||
|
||||
#[pymodule]
|
||||
fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
fn sglang_router_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<PolicyType>()?;
|
||||
m.add_class::<Router>()?;
|
||||
Ok(())
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
import sglang_router as router
|
||||
|
||||
# Create a Router instance with:
|
||||
# - host: the address to bind to (e.g., "127.0.0.1")
|
||||
# - port: the port number (e.g., 3001)
|
||||
# - worker_urls: list of worker URLs to distribute requests to
|
||||
router = router.Router(
|
||||
host="127.0.0.1",
|
||||
port=3001,
|
||||
worker_urls=[
|
||||
"http://localhost:30000",
|
||||
"http://localhost:30002",
|
||||
],
|
||||
policy="random",
|
||||
)
|
||||
|
||||
# Start the router - this will block and run the server
|
||||
router.start()
|
||||
Reference in New Issue
Block a user