Improve end-to-end throughput test and its coverage (#1039)
This commit is contained in:
27
.github/workflows/e2e-test.yml
vendored
27
.github/workflows/e2e-test.yml
vendored
@@ -37,23 +37,16 @@ jobs:
|
||||
|
||||
- name: Benchmark Serving Throughput
|
||||
run: |
|
||||
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
|
||||
SERVER_PID=$!
|
||||
cd test/srt
|
||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
|
||||
|
||||
echo "Waiting for server to start..."
|
||||
for i in {1..120}; do
|
||||
if curl -s http://127.0.0.1:8413/health; then
|
||||
echo "Server is up!"
|
||||
break
|
||||
fi
|
||||
if [ $i -eq 120 ]; then
|
||||
echo "Server failed to start within 120 seconds"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
- name: Benchmark Serving Throughput (w/o RadixAttention)
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
||||
|
||||
cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 500 --random-input 4096 --random-output 2048
|
||||
- name: Benchmark Serving Throughput (w/o FlashInfer)
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_flashinfer
|
||||
|
||||
echo "Stopping server..."
|
||||
kill -9 $SERVER_PID
|
||||
|
||||
Reference in New Issue
Block a user