diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index b604fe9..befdf69 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -78,7 +78,9 @@ kill_npu_processes() { ps -aux lsof -t -i:8000 | xargs -r kill -9 pgrep python3 | xargs -r kill -9 - + # vLLM now names the process with VLLM prefix after https://github.com/vllm-project/vllm/pull/21445 + pgrep VLLM | xargs -r kill -9 + sleep 4 rm -rf ~/.config/vllm diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json index 6398710..c2be9eb 100644 --- a/benchmarks/tests/serving-tests.json +++ b/benchmarks/tests/serving-tests.json @@ -23,7 +23,8 @@ "hf_split": "train", "endpoint": "/v1/chat/completions", "dataset_path": "lmarena-ai/vision-arena-bench-v0.1", - "num_prompts": 200 + "num_prompts": 200, + "no_stream": "" } }, {