[Benchmark] Refactor perf script to use benchmark cli (#1524)

### What this PR does / why we need it? Since, `vllm bench` cli has optimized enough for production use(support more datasets), we are now do not need to copy vllm codes, now , with vllm installed, we can easily use the benchmark cli ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-06-30 23:42:04 +08:00
parent 53ec583bbb
commit 6db7dc2c85
5 changed files with 158 additions and 40 deletions
--- a/benchmarks/scripts/patch_benchmark_dataset.py
+++ b/benchmarks/scripts/patch_benchmark_dataset.py
@@ -1,3 +1,4 @@
+import os
 from argparse import ArgumentParser

 import libcst as cst
@@ -44,16 +45,22 @@ class StreamingFalseTransformer(cst.CSTTransformer):


 def patch_file(path):
-    with open(path, "r", encoding="utf-8") as f:
+    abs_path = os.path.abspath(path)
+
+    if not os.path.exists(abs_path):
+        print(f"File not found: {abs_path}")
+        return
+
+    with open(abs_path, "r", encoding="utf-8") as f:
        source = f.read()

    module = cst.parse_module(source)
    modified = module.visit(StreamingFalseTransformer())

-    with open(path, "w", encoding="utf-8") as f:
+    with open(abs_path, "w", encoding="utf-8") as f:
        f.write(modified.code)

-    print(f"Patched: {path}")
+    print(f"Patched: {abs_path}")


 if __name__ == '__main__':
@@ -61,8 +68,10 @@ if __name__ == '__main__':
        description=
        "Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
    )
-    parser.add_argument("--path",
-                        type=str,
-                        help="Path to the benchmark_dataset.py file")
+    parser.add_argument(
+        "--path",
+        type=str,
+        default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
+        help="Path to the benchmark_dataset.py file")
    args = parser.parse_args()
    patch_file(args.path)
--- a/benchmarks/scripts/run-performance-benchmarks.sh
+++ b/benchmarks/scripts/run-performance-benchmarks.sh
@@ -54,13 +54,20 @@ json2args() {
 }

 wait_for_server() {
-  # wait for vllm server to start
-  # return 1 if vllm server crashes
-  timeout 1200 bash -c '
-    until curl -s -X GET localhost:8000/health; do
-      echo "Waiting for vllm server to start..."
-      sleep 1
-    done' && return 0 || return 1
+  local waited=0
+  local timeout_sec=1200
+
+  while (( waited < timeout_sec )); do
+    if curl -s -X GET localhost:8000/health > /dev/null; then
+      return 0
+    fi
+    echo "Waiting for vllm server to start..."
+    sleep 1
+    ((waited++))
+  done
+
+  echo "Timeout waiting for server"
+  return 1
 }

 get_cur_npu_id() {
@@ -114,7 +121,7 @@ run_latency_tests() {
    latency_params=$(echo "$params" | jq -r '.parameters')
    latency_args=$(json2args "$latency_params")

-    latency_command="python3 vllm_benchmarks/benchmark_latency.py \
+    latency_command="vllm bench latency \
      --output-json $RESULTS_FOLDER/${test_name}.json \
      $latency_args"

@@ -157,7 +164,7 @@ run_throughput_tests() {
    throughput_params=$(echo "$params" | jq -r '.parameters')
    throughput_args=$(json2args "$throughput_params")

-    throughput_command="python3 vllm_benchmarks/benchmark_throughput.py \
+    throughput_command="vllm bench throughput \
      --output-json $RESULTS_FOLDER/${test_name}.json \
      $throughput_args"

@@ -243,7 +250,7 @@ run_serving_tests() {

      new_test_name=$test_name"_qps_"$qps

-      client_command="python3 vllm_benchmarks/benchmark_serving.py \
+      client_command="vllm bench serve \
        --save-result \
        --result-dir $RESULTS_FOLDER \
        --result-filename ${new_test_name}.json \
@@ -271,17 +278,11 @@ cleanup_on_error() {
  rm -rf $RESULTS_FOLDER
 }

-get_benchmarks_scripts() {
-  git clone -b main --depth=1 https://github.com/vllm-project/vllm.git && \
-  mv vllm/benchmarks vllm_benchmarks
-  rm -rf ./vllm
-}
-
 main() {
-
  START_TIME=$(date +%s)
  check_npus
-
+  python3 benchmarks/scripts/patch_benchmark_dataset.py
+  
  # dependencies
  (which wget && which curl) || (apt-get update && apt-get install -y wget curl)
  (which jq) || (apt-get update && apt-get -y install jq)
@@ -298,8 +299,6 @@ main() {

  # prepare for benchmarking
  cd benchmarks || exit 1
-  get_benchmarks_scripts
-  python3 scripts/patch_benchmark_dataset.py --path vllm_benchmarks/benchmark_dataset.py
  trap cleanup EXIT

  QUICK_BENCHMARK_ROOT=./