Improve end-to-end throughput test and its coverage (#1039)

This commit is contained in:
Lianmin Zheng
2024-08-11 18:27:33 -07:00
committed by GitHub
parent 7de6034534
commit 8207637029
14 changed files with 224 additions and 46 deletions

View File

@@ -37,23 +37,16 @@ jobs:
- name: Benchmark Serving Throughput
run: |
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
SERVER_PID=$!
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
echo "Waiting for server to start..."
for i in {1..120}; do
if curl -s http://127.0.0.1:8413/health; then
echo "Server is up!"
break
fi
if [ $i -eq 120 ]; then
echo "Server failed to start within 120 seconds"
exit 1
fi
sleep 1
done
- name: Benchmark Serving Throughput (w/o RadixAttention)
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 500 --random-input 4096 --random-output 2048
- name: Benchmark Serving Throughput (w/o FlashInfer)
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_flashinfer
echo "Stopping server..."
kill -9 $SERVER_PID