From 402872050a8451c3de57f4c8d75f5c4b735e1351 Mon Sep 17 00:00:00 2001
From: Icey <1790571317@qq.com>
Date: Tue, 20 Jan 2026 17:08:43 +0800
Subject: [PATCH] [Tests] move qwen3 performance test from nightly to e2e
 (#5980)

### What this PR does / why we need it?
Move the qwen3 performance test from nightly to e2e to intercept
performance degradation.

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060

---------

Signed-off-by: wxsIcey <1790571317@qq.com>
---
 .github/workflows/_e2e_test.yaml                              | 1 +
 .github/workflows/nightly_test_a2.yaml                        | 3 ---
 .../2-cards/test_qwen3_performance.py}                        | 4 ++--
 3 files changed, 3 insertions(+), 5 deletions(-)
 rename tests/e2e/{nightly/single_node/models/test_qwen3_8b.py => multicard/2-cards/test_qwen3_performance.py} (95%)

diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index 17fb04ff..96ae9578 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -222,6 +222,7 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
         if: ${{ inputs.type == 'full' }}
         run: |
+          pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_performance.py
           pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py
           pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py
           pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py
diff --git a/.github/workflows/nightly_test_a2.yaml b/.github/workflows/nightly_test_a2.yaml
index 736fcb27..b772b300 100644
--- a/.github/workflows/nightly_test_a2.yaml
+++ b/.github/workflows/nightly_test_a2.yaml
@@ -49,9 +49,6 @@ jobs:
       fail-fast: false
       matrix:
         test_config:
-          - name: qwen3-8b
-            os: linux-aarch64-a2-1
-            tests: tests/e2e/nightly/single_node/models/test_qwen3_8b.py
           - name: qwen3next
             os: linux-aarch64-a2-4
             tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
diff --git a/tests/e2e/nightly/single_node/models/test_qwen3_8b.py b/tests/e2e/multicard/2-cards/test_qwen3_performance.py
similarity index 95%
rename from tests/e2e/nightly/single_node/models/test_qwen3_8b.py
rename to tests/e2e/multicard/2-cards/test_qwen3_performance.py
index 0f0ae383..e8a6e51e 100644
--- a/tests/e2e/nightly/single_node/models/test_qwen3_8b.py
+++ b/tests/e2e/multicard/2-cards/test_qwen3_performance.py
@@ -37,14 +37,14 @@ api_keyword_args = {
 
 vllm_bench_cases = {
     "dataset-name": "random",
-    "num_prompts": 1000,
+    "num_prompts": 500,
     "request_rate": 20,
     "random_input_len": 128,
     "max_concurrency": 40,
     "random_output_len": 100,
 }
 
-baseline_throughput = 1622.08  # baseline throughput for Qwen3-8B
+baseline_throughput = 1600.0  # baseline throughput for Qwen3-8B, measured with num_prompts=500
 
 
 @pytest.mark.parametrize("model", MODELS)