diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index b82bbdc36..6e8984763 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: e2e-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: self-hosted + runs-on: bench steps: - name: Checkout code @@ -38,6 +38,7 @@ jobs: - name: Benchmark Serving Throughput run: | python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & + SERVER_PID=$! echo "Waiting for server to start..." for i in {1..120}; do @@ -52,7 +53,7 @@ jobs: sleep 1 done - cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 + cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 500 --random-input 4096 --random-output 2048 echo "Stopping server..." - kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -- "--port 8413" | grep -v grep | awk '{print $2}') + kill -9 $SERVER_PID diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index e43caf5f0..8d4ddcdb7 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: self-hosted + runs-on: unit steps: - name: Checkout code diff --git a/python/sglang/README.md b/python/sglang/README.md index c873e1d63..c92144254 100644 --- a/python/sglang/README.md +++ b/python/sglang/README.md @@ -1,6 +1,5 @@ # Code Structures - - `lang`: The frontend language. - `srt`: The backend engine for running local models. (SRT = SGLang Runtime). - `test`: Test utilities.