[Tests] move qwen3 performance test from nightly to e2e (#5980)
### What this PR does / why we need it?
Move the qwen3 performance test from nightly to e2e to intercept
performance degradation.
- vLLM version: v0.13.0
- vLLM main:
2c24bc6996
---------
Signed-off-by: wxsIcey <1790571317@qq.com>
This commit is contained in:
1
.github/workflows/_e2e_test.yaml
vendored
1
.github/workflows/_e2e_test.yaml
vendored
@@ -222,6 +222,7 @@ jobs:
|
|||||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||||
if: ${{ inputs.type == 'full' }}
|
if: ${{ inputs.type == 'full' }}
|
||||||
run: |
|
run: |
|
||||||
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_performance.py
|
||||||
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py
|
||||||
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py
|
||||||
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py
|
||||||
|
|||||||
3
.github/workflows/nightly_test_a2.yaml
vendored
3
.github/workflows/nightly_test_a2.yaml
vendored
@@ -49,9 +49,6 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
test_config:
|
test_config:
|
||||||
- name: qwen3-8b
|
|
||||||
os: linux-aarch64-a2-1
|
|
||||||
tests: tests/e2e/nightly/single_node/models/test_qwen3_8b.py
|
|
||||||
- name: qwen3next
|
- name: qwen3next
|
||||||
os: linux-aarch64-a2-4
|
os: linux-aarch64-a2-4
|
||||||
tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
|
tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
|
||||||
|
|||||||
@@ -37,14 +37,14 @@ api_keyword_args = {
|
|||||||
|
|
||||||
vllm_bench_cases = {
|
vllm_bench_cases = {
|
||||||
"dataset-name": "random",
|
"dataset-name": "random",
|
||||||
"num_prompts": 1000,
|
"num_prompts": 500,
|
||||||
"request_rate": 20,
|
"request_rate": 20,
|
||||||
"random_input_len": 128,
|
"random_input_len": 128,
|
||||||
"max_concurrency": 40,
|
"max_concurrency": 40,
|
||||||
"random_output_len": 100,
|
"random_output_len": 100,
|
||||||
}
|
}
|
||||||
|
|
||||||
baseline_throughput = 1622.08 # baseline throughput for Qwen3-8B
|
baseline_throughput = 1600.0 # baseline throughput for Qwen3-8B, measured with num_prompts=500
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", MODELS)
|
@pytest.mark.parametrize("model", MODELS)
|
||||||
Reference in New Issue
Block a user