diff --git a/.github/workflows/pr-e2e-test.yml b/.github/workflows/pr-e2e-test.yml index 4942e89f0..d73dc4605 100644 --- a/.github/workflows/pr-e2e-test.yml +++ b/.github/workflows/pr-e2e-test.yml @@ -1,10 +1,10 @@ name: PR E2E Test on: - push: - branches: [ main ] pull_request: branches: [ main ] + paths: + - "python/sglang/*" workflow_dispatch: jobs: @@ -26,22 +26,22 @@ jobs: - name: Launch server and run benchmark run: | - python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 & + python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & echo "Waiting for server to start..." - for i in {1..60}; do + for i in {1..120}; do if curl -s http://127.0.0.1:8413/health; then echo "Server is up!" break fi - if [ $i -eq 60 ]; then - echo "Server failed to start within 60 seconds" + if [ $i -eq 120 ]; then + echo "Server failed to start within 120 seconds" exit 1 fi sleep 1 done - python3 -m sglang.bench_serving --backend sglang --port 8413 + cd /home/lmzheng/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 echo "Stopping server..." kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 3d970d3a9..b52e114fd 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -1,5 +1,6 @@ # Adapted from https://github.com/vllm-project/vllm/blob/6366efc67b0aedd2c1721c14385370e50b297fb3/benchmarks/backend_request_func.py # Adapted from https://github.com/vllm-project/vllm/blob/6366efc67b0aedd2c1721c14385370e50b297fb3/benchmarks/benchmark_serving.py + """ Benchmark online serving.