[Benchmark] Refactor perf script to use benchmark cli (#1524)

### What this PR does / why we need it? Since, `vllm bench` cli has optimized enough for production use(support more datasets), we are now do not need to copy vllm codes, now , with vllm installed, we can easily use the benchmark cli ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-06-30 23:42:04 +08:00
parent 53ec583bbb
commit 6db7dc2c85
5 changed files with 158 additions and 40 deletions
--- a/benchmarks/tests/serving-tests.json
+++ b/benchmarks/tests/serving-tests.json
@@ -18,7 +18,7 @@
    },
    "client_parameters": {
      "model": "Qwen/Qwen2.5-VL-7B-Instruct",
-      "backend": "openai-chat",
+      "endpoint_type": "openai-chat",
      "dataset_name": "hf",
      "hf_split": "train",
      "endpoint": "/v1/chat/completions",
@@ -44,7 +44,7 @@
    },
    "client_parameters": {
      "model": "Qwen/Qwen3-8B",
-      "backend": "vllm",
+      "endpoint_type": "vllm",
      "dataset_name": "sharegpt",
      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
      "num_prompts": 200
@@ -68,7 +68,7 @@
    },
    "client_parameters": {
      "model": "Qwen/Qwen2.5-7B-Instruct",
-      "backend": "vllm",
+      "endpoint_type": "vllm",
      "dataset_name": "sharegpt",
      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
      "num_prompts": 200