Improve end-to-end throughput test and its coverage (#1039)

2024-08-11 18:27:33 -07:00
parent 7de6034534
commit 8207637029
14 changed files with 224 additions and 46 deletions
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -37,23 +37,16 @@ jobs:

    - name: Benchmark Serving Throughput
      run: |
-        python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
-        SERVER_PID=$!
+        cd test/srt
+        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default

-        echo "Waiting for server to start..."
-        for i in {1..120}; do
-          if curl -s http://127.0.0.1:8413/health; then
-            echo "Server is up!"
-            break
-          fi
-          if [ $i -eq 120 ]; then
-            echo "Server failed to start within 120 seconds"
-            exit 1
-          fi
-          sleep 1
-        done
+    - name: Benchmark Serving Throughput (w/o RadixAttention)
+      run: |
+        cd test/srt
+        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache

-        cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 500 --random-input 4096 --random-output 2048
+    - name: Benchmark Serving Throughput (w/o FlashInfer)
+      run: |
+        cd test/srt
+        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_flashinfer

-        echo "Stopping server..."
-        kill -9 $SERVER_PID