From 6c498313942b32e548dd0b499f279db0abc5b085 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 04:20:54 -0700 Subject: [PATCH] Add sglang.bench_latency to CI (#1243) --- .github/workflows/e2e-test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 7f555110d..11c94775c 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -38,6 +38,11 @@ jobs: cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + - name: Benchmark Serving Latency + timeout-minutes: 10 + run: | + python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8 + - name: Benchmark Serving Throughput (w/o RadixAttention) timeout-minutes: 10 run: |