From 61bb223e0fc1ccd0c26ac3137f0d9154bcecc25a Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 17:31:52 -0700 Subject: [PATCH] Update CI runner docs (#1213) --- .github/workflows/moe-test.yml | 4 +- docs/en/setup_github_runner.md | 109 ++++++++++----------------------- 2 files changed, 34 insertions(+), 79 deletions(-) diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index dd5665a3f..2caa3d182 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -33,13 +33,13 @@ jobs: pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - name: Benchmark MoE Serving Throughput - timeout_minutes: 10 + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - name: Benchmark MoE Serving Throughput (w/o RadixAttention) - timeout_minutes: 10 + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/docs/en/setup_github_runner.md b/docs/en/setup_github_runner.md index 97a7f2626..282e12b73 100644 --- a/docs/en/setup_github_runner.md +++ b/docs/en/setup_github_runner.md @@ -1,89 +1,44 @@ -# Set up self hosted runner for GitHub Action +# Set Up Self-hosted Runners for GitHub Action -## Config Runner +## Add a Runner -```bash -# https://github.com/sgl-project/sglang/settings/actions/runners/new?arch=x64&os=linux -# Involves some TOKEN and other private information, click the link to view specific steps. +### Step 1: Start a docker container. + +You can mount a folder for the shared huggingface model weights cache. The command below uses `/tmp/huggingface` as an example. + +``` +docker pull nvidia/cuda:12.1.1-devel-ubuntu22.04 +docker run --shm-size 64g -it -v /tmp/huggingface:/hf_home --gpus all nvidia/cuda:12.1.1-devel-ubuntu22.04 /bin/bash ``` -## Start Runner +### Step 2: Configure the runner by `config.sh` + +Run these commands inside the container. -add `/lib/systemd/system/e2e.service` ``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=7" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v1" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v1/actions-runner/run.sh -[Install] -WantedBy=multi-user.target +apt update && apt install -y curl python3-pip git +export RUNNER_ALLOW_RUNASROOT=1 ``` -add `/lib/systemd/system/unit.service` +Then follow https://github.com/sgl-project/sglang/settings/actions/runners/new?arch=x64&os=linux to run `config.sh` + +**Notes** +- Do not need to specify the runner group +- Give it a name (e.g., `test-sgl-gpu-0`) and some labels (e.g., `unit-test`). The labels can be editted later in Github Settings. +- Do not need to change the work folder. + +### Step 3: Run the runner by `run.sh` + +- Set up environment variables ``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=6" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v2" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v2/actions-runner/run.sh -[Install] -WantedBy=multi-user.target +export HF_HOME=/hf_home +export SGLANG_IS_IN_CI=true +export HF_TOKEN=hf_xxx +export OPENAI_API_KEY=sk-xxx +export CUDA_VISIBLE_DEVICES=0 ``` -add `/lib/systemd/system/accuracy.service` -``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=5" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v3" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v3/actions-runner/run.sh -[Install] -WantedBy=multi-user.target -``` - -```bash -cd /data/zhyncs/runner-v1 -python3 -m venv venv - -cd /data/zhyncs/runner-v2 -python3 -m venv venv - -cd /data/zhyncs/runner-v3 -python3 -m venv venv - -sudo systemctl daemon-reload - -sudo systemctl start e2e -sudo systemctl enable e2e -sudo systemctl status e2e - -sudo systemctl start unit -sudo systemctl enable unit -sudo systemctl status unit - -sudo systemctl start accuracy -sudo systemctl enable accuracy -sudo systemctl status accuracy +- Run it forever ``` +while true; do ./run.sh; echo "Restarting..."; sleep 2; done +``` \ No newline at end of file