[Tests] move qwen3 performance test from nightly to e2e (#5980)

### What this PR does / why we need it? Move the qwen3 performance test from nightly to e2e to intercept performance degradation. - vLLM version: v0.13.0 - vLLM main: 2c24bc6996 --------- Signed-off-by: wxsIcey <1790571317@qq.com>
2026-01-20 17:08:43 +08:00
parent 5892455f43
commit 402872050a
3 changed files with 3 additions and 5 deletions
--- a/tests/e2e/multicard/2-cards/test_qwen3_performance.py
+++ b/tests/e2e/multicard/2-cards/test_qwen3_performance.py
@@ -37,14 +37,14 @@ api_keyword_args = {

 vllm_bench_cases = {
    "dataset-name": "random",
-    "num_prompts": 1000,
+    "num_prompts": 500,
    "request_rate": 20,
    "random_input_len": 128,
    "max_concurrency": 40,
    "random_output_len": 100,
 }

-baseline_throughput = 1622.08  # baseline throughput for Qwen3-8B
+baseline_throughput = 1600.0  # baseline throughput for Qwen3-8B, measured with num_prompts=500


@pytest.mark.parametrize("model", MODELS)