diff --git a/.github/workflows/release-pypi-router.yml b/.github/workflows/release-pypi-router.yml new file mode 100644 index 000000000..f48bca4e7 --- /dev/null +++ b/.github/workflows/release-pypi-router.yml @@ -0,0 +1,104 @@ +# Reference: https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1 + +name: Release SGLang Router to PyPI + +on: + workflow_dispatch: + +jobs: + build: + name: Build on ${{ matrix.os }} (${{ matrix.target }}) + runs-on: ${{ matrix.os }}-latest + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu + target: x86_64 + + steps: + - uses: actions/checkout@v4 + with: + path: sglang-repo + + - name: Move rust folder to root and delete sglang-repo + run: | + mv sglang-repo/rust/* . + rm -rf sglang-repo + ls -alt + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install build dependencies + run: | + python -m pip install -U pip + python -m pip install build twine auditwheel + + - name: Build package + uses: pypa/cibuildwheel@v2.21.3 + env: + CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64" + CIBW_BEFORE_ALL: | + yum update && yum install -y openssl-devel && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH" + + - name: List built packages + run: ls -lh wheelhouse/ + + - name: Check packages + run: twine check --strict wheelhouse/* + + - uses: actions/upload-artifact@v4 + with: + name: packages-${{ matrix.os }}-${{ matrix.target }} + path: wheelhouse/ + + build-sdist: + name: Build SDist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + path: sglang-repo + + - name: Move rust folder to root and delete sglang-repo + run: | + mv sglang-repo/rust/* . + rm -rf sglang-repo + ls -alt + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Build SDist + run: | + pip install build + python -m build --sdist + + - uses: actions/upload-artifact@v4 + with: + name: sdist + path: dist/*.tar.gz + + upload: + name: Upload to PyPI + needs: [build, build-sdist] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + + - name: Upload to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + pip install twine + twine upload dist/* --verbose diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a7f0dce33..69b666df1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2091,7 +2091,7 @@ dependencies = [ ] [[package]] -name = "sglang_router" +name = "sglang_router_rs" version = "0.0.0" dependencies = [ "actix-web", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index d02eba639..a7f5d9ddd 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,14 +1,14 @@ [package] -name = "sglang_router" +name = "sglang_router_rs" version = "0.0.0" edition = "2021" [[bin]] -name = "sglang_router" +name = "sglang_router_rs" path = "src/main.rs" [lib] -name = "sglang_router" +name = "sglang_router_rs" # Pure Rust library: Just omit crate-type (defaults to rlib) # Python/C binding + Rust library: Use ["cdylib", "rlib"] crate-type = ["cdylib", "rlib"] diff --git a/rust/MANIFEST.in b/rust/MANIFEST.in new file mode 100644 index 000000000..e1d6e7a90 --- /dev/null +++ b/rust/MANIFEST.in @@ -0,0 +1,3 @@ +# Must include: +include Cargo.toml # Rust project configuration +recursive-include src *.rs # Rust source files diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 000000000..0b5d7e2d3 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,71 @@ +# SGLang Router (Experimental) + +SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances. + +## Prerequisites + +- Rust and Cargo installed + +```bash +# Install rustup (Rust installer and version manager) +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + +# Follow the installation prompts, then reload your shell +source $HOME/.cargo/env + +# Verify installation +rustc --version +cargo --version +``` + +- Python with pip installed + +## Build Process + +### 1. Build Rust Project + +```bash +cargo build +``` + +### 2. Build Python Binding + +#### Option A: Build and Install Wheel +1. Build the wheel package: +```bash +pip install setuptools-rust wheel build +python -m build +``` + +2. Install the generated wheel: +```bash +pip install +``` + +#### Option B: Development Mode + +For development purposes, you can install the package in editable mode: +```bash +pip install -e . +``` + +**Note:** When modifying Rust code, you must rebuild the wheel for changes to take effect. + +## CI/CD Setup + +The continuous integration pipeline consists of three main steps: + +### 1. Build Wheels +- Uses `cibuildwheel` to create manylinux x86_64 packages +- Compatible with major Linux distributions (Ubuntu, CentOS, etc.) +- Additional configurations can be added to support other OS/architectures +- Reference: [cibuildwheel documentation](https://cibuildwheel.pypa.io/en/stable/) + +### 2. Build Source Distribution +- Creates a source distribution containing the raw, unbuilt code +- Enables `pip` to build the package from source when prebuilt wheels are unavailable + +### 3. Publish to PyPI +- Uploads both wheels and source distribution to PyPI + +The CI configuration is based on the [tiktoken workflow](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1). diff --git a/rust/py_src/main.py b/rust/demo.py similarity index 100% rename from rust/py_src/main.py rename to rust/demo.py diff --git a/rust/py_src/dp_demo.py b/rust/dp_demo.py similarity index 100% rename from rust/py_src/dp_demo.py rename to rust/dp_demo.py diff --git a/rust/py_src/__init__.py b/rust/py_src/__init__.py new file mode 100644 index 000000000..1bb51e264 --- /dev/null +++ b/rust/py_src/__init__.py @@ -0,0 +1,5 @@ +# a lightweihgt wrapper on router with argument type and comments +# no wrapper on policy type => direct export +from sglang_router_rs import PolicyType + +from .router import Router diff --git a/rust/py_src/router.py b/rust/py_src/router.py new file mode 100644 index 000000000..200027937 --- /dev/null +++ b/rust/py_src/router.py @@ -0,0 +1,48 @@ +from typing import List, Optional + +from sglang_router_rs import PolicyType +from sglang_router_rs import Router as _Router + + +class Router: + """ + A high-performance router for distributing requests across worker nodes. + + Args: + worker_urls: List of URLs for worker nodes that will handle requests + policy: Load balancing policy to use. Options: + - PolicyType.Random: Randomly select workers + - PolicyType.RoundRobin: Distribute requests in round-robin fashion + - PolicyType.ApproxTree: Tree-based routing using tokenizer similarity + host: Host address to bind the router server + port: Port number to bind the router server + tokenizer_path: Path to tokenizer model file (required for ApproxTree policy) + cache_threshold: Caching threshold value between 0-1 + + """ + + def __init__( + self, + worker_urls: List[str], + policy: PolicyType = PolicyType.RoundRobin, + host: str = "127.0.0.1", + port: int = 3001, + tokenizer_path: Optional[str] = None, + cache_threshold: float = 0.50, + ): + + self._router = _Router( + worker_urls=worker_urls, + policy=policy, + host=host, + port=port, + tokenizer_path=tokenizer_path, + cache_threshold=cache_threshold, + ) + + def start(self) -> None: + """Start the router server. + + This method blocks until the server is shut down. + """ + self._router.start() diff --git a/rust/pyproject.toml b/rust/pyproject.toml index 6eb82967f..24108f35c 100644 --- a/rust/pyproject.toml +++ b/rust/pyproject.toml @@ -1,15 +1,25 @@ [build-system] -requires = ["maturin>=1.5.1,<2.0"] -build-backend = "maturin" +requires = ["setuptools>=45", "wheel", "setuptools-rust>=1.5.2"] +build-backend = "setuptools.build_meta" [project] -name = "sglang_router" -requires-python = ">=3.9" +name = "sglang-router" +version = "0.0.2" +description = "SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances." +authors = [{name = "Byron Hsu", email = "byronhsu1230@gmail.com"}] +requires-python = ">=3.8" +readme = "README.md" classifiers = [ - "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Rust", + "Programming Language :: Python :: 3", ] -dynamic = ["version"] -[tool.maturin] -bindings = 'pyo3' +# https://github.com/PyO3/setuptools-rust?tab=readme-ov-file +[tool.setuptools.packages] +find = { where = ["py_src"] } + +[[tool.setuptools-rust.ext-modules]] +target = "sglang_router_rs" +path = "Cargo.toml" +binding = "PyO3" diff --git a/rust/readme.md b/rust/readme.md deleted file mode 100644 index 7ef672e0e..000000000 --- a/rust/readme.md +++ /dev/null @@ -1,130 +0,0 @@ -# SGLang Router (Experimental) - -SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances. - -## Architecture - -1. `src/`: rust impl of the router -2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package - -## Installation - -WIP. Ideally just - -```bash -pip install sglang-router -``` - -## Development - -### Rust - -1. Install Rust - -```bash -# Install rustup (Rust installer and version manager) -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - -# Follow the installation prompts, then reload your shell -source $HOME/.cargo/env - -# Verify installation -rustc --version -cargo --version -``` - -2. Build the router - -```bash -# Navigate to the rust directory -cd ./rust - -# Build the project -cargo build - -# Verify the binary works correctly -./target/debug/router --help -``` - -The help command will show available options: -``` -Usage: router [OPTIONS] - -Options: - --host [default: 127.0.0.1] - --port [default: 3001] - --worker-urls - --policy [default: round_robin] [possible values: round_robin, random] - -h, --help Print help - -V, --version Print version -``` - -### Python Binding - -1. Create a virtual environment - -```bash -$ python -m venv .venv -$ source .venv/bin/activate -``` - -2. Install python dependencies - -```bash -$ pip install maturin -$ pip install patchelf -``` - -3. Install rust python binding - -```bash -$ maturin develop -🔗 Found pyo3 bindings -🐍 Found CPython 3.10 at /home/jobuser/resources/sglang/rust/.venv/bin/python -📡 Using build options bindings from pyproject.toml - Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.11s -📦 Built wheel for CPython 3.10 to /tmp/.tmpJb65sc/sglang_router-0.0.0-cp310-cp310-linux_x86_64.whl -✏️ Setting installed package as editable -🛠 Installed sglang_router-0.0.0 -``` - -4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it - -```bash -$ maturin build --interpreter python -... - Compiling pyo3 v0.22.6 - Compiling pyo3-macros v0.22.6 - Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust) - Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s -🖨 Copied external shared libraries to package sglang_router.libs directory: - /usr/lib/libssl.so.1.1.1k - /usr/lib/libcrypto.so.1.1.1k -📦 Built wheel for CPython 3.10 to - -$ pip install -``` - -## Usage - -1. Launch worker instances -```bash -# Launch first worker on GPU 0 -export CUDA_VISIBLE_DEVICES=0 -python -m sglang.launch_server \ - --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \ - --host 127.0.0.1 \ - --port 30000 - -# Launch second worker on GPU 1 -export CUDA_VISIBLE_DEVICES=1 -python -m sglang.launch_server \ - --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \ - --host 127.0.0.1 \ - --port 30002 -``` - -2. Launch router and connect to workers -```bash -./target/debug/router --worker-urls http://127.0.0.1:30000,http://127.0.0.1:30002 -``` diff --git a/rust/src/lib.rs b/rust/src/lib.rs index cc99cb15a..9f3fb6fc2 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -90,7 +90,7 @@ impl Router { } #[pymodule] -fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> { +fn sglang_router_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) diff --git a/rust/test_bindings.py b/rust/test_bindings.py deleted file mode 100644 index c4ecfe3c6..000000000 --- a/rust/test_bindings.py +++ /dev/null @@ -1,18 +0,0 @@ -import sglang_router as router - -# Create a Router instance with: -# - host: the address to bind to (e.g., "127.0.0.1") -# - port: the port number (e.g., 3001) -# - worker_urls: list of worker URLs to distribute requests to -router = router.Router( - host="127.0.0.1", - port=3001, - worker_urls=[ - "http://localhost:30000", - "http://localhost:30002", - ], - policy="random", -) - -# Start the router - this will block and run the server -router.start()