From 2967e5e22ab2e12888464c01100b2ddeed34720a Mon Sep 17 00:00:00 2001
From: Li Wang <wangli858794774@gmail.com>
Date: Sun, 7 Sep 2025 10:36:34 +0800
Subject: [PATCH] [Benchmark] Correctly kill vllm process in performance
 benchamrk (#2782)

### What this PR does / why we need it?
vLLM now names the process with VLLM prefix after
https://github.com/vllm-project/vllm/pull/21445, we should kill the
correct process name after one iteration benchmark to avoid OOM issue
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/e599e2c65ee32abcc986733ab0a55becea158bb4

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
---
 benchmarks/scripts/run-performance-benchmarks.sh | 4 +++-
 benchmarks/tests/serving-tests.json              | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh
index b604fe9..befdf69 100644
--- a/benchmarks/scripts/run-performance-benchmarks.sh
+++ b/benchmarks/scripts/run-performance-benchmarks.sh
@@ -78,7 +78,9 @@ kill_npu_processes() {
   ps -aux
   lsof -t -i:8000 | xargs -r kill -9
   pgrep python3 | xargs -r kill -9
-  
+  # vLLM now names the process with VLLM prefix after https://github.com/vllm-project/vllm/pull/21445
+  pgrep VLLM | xargs -r kill -9
+
   sleep 4
   rm -rf ~/.config/vllm
 
diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json
index 6398710..c2be9eb 100644
--- a/benchmarks/tests/serving-tests.json
+++ b/benchmarks/tests/serving-tests.json
@@ -23,7 +23,8 @@
       "hf_split": "train",
       "endpoint": "/v1/chat/completions",
       "dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
-      "num_prompts": 200
+      "num_prompts": 200,
+      "no_stream": ""
     }
   },
   {