From 402872050a8451c3de57f4c8d75f5c4b735e1351 Mon Sep 17 00:00:00 2001 From: Icey <1790571317@qq.com> Date: Tue, 20 Jan 2026 17:08:43 +0800 Subject: [PATCH] [Tests] move qwen3 performance test from nightly to e2e (#5980) ### What this PR does / why we need it? Move the qwen3 performance test from nightly to e2e to intercept performance degradation. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060 --------- Signed-off-by: wxsIcey <1790571317@qq.com> --- .github/workflows/_e2e_test.yaml | 1 + .github/workflows/nightly_test_a2.yaml | 3 --- .../2-cards/test_qwen3_performance.py} | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) rename tests/e2e/{nightly/single_node/models/test_qwen3_8b.py => multicard/2-cards/test_qwen3_performance.py} (95%) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 17fb04ff..96ae9578 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -222,6 +222,7 @@ jobs: VLLM_WORKER_MULTIPROC_METHOD: spawn if: ${{ inputs.type == 'full' }} run: | + pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_performance.py pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py diff --git a/.github/workflows/nightly_test_a2.yaml b/.github/workflows/nightly_test_a2.yaml index 736fcb27..b772b300 100644 --- a/.github/workflows/nightly_test_a2.yaml +++ b/.github/workflows/nightly_test_a2.yaml @@ -49,9 +49,6 @@ jobs: fail-fast: false matrix: test_config: - - name: qwen3-8b - os: linux-aarch64-a2-1 - tests: tests/e2e/nightly/single_node/models/test_qwen3_8b.py - name: qwen3next os: linux-aarch64-a2-4 tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py diff --git a/tests/e2e/nightly/single_node/models/test_qwen3_8b.py b/tests/e2e/multicard/2-cards/test_qwen3_performance.py similarity index 95% rename from tests/e2e/nightly/single_node/models/test_qwen3_8b.py rename to tests/e2e/multicard/2-cards/test_qwen3_performance.py index 0f0ae383..e8a6e51e 100644 --- a/tests/e2e/nightly/single_node/models/test_qwen3_8b.py +++ b/tests/e2e/multicard/2-cards/test_qwen3_performance.py @@ -37,14 +37,14 @@ api_keyword_args = { vllm_bench_cases = { "dataset-name": "random", - "num_prompts": 1000, + "num_prompts": 500, "request_rate": 20, "random_input_len": 128, "max_concurrency": 40, "random_output_len": 100, } -baseline_throughput = 1622.08 # baseline throughput for Qwen3-8B +baseline_throughput = 1600.0 # baseline throughput for Qwen3-8B, measured with num_prompts=500 @pytest.mark.parametrize("model", MODELS)