From 4970de42420f6504dabf6d70b3cbfa7881021165 Mon Sep 17 00:00:00 2001 From: wjunLu <135617475+wjunLu@users.noreply.github.com> Date: Thu, 29 Jan 2026 22:41:41 +0800 Subject: [PATCH] [CI] Enable the skipped cases when HDK is upgraded to 25.5.0 (#6195) ### What this PR does / why we need it? Enable the tests that were skipped due to an outdated driver version: - tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py - tests/e2e/multicard/4-cards/long_sequence/test_basic.py - tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py and some cases in - tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py - tests/e2e/multicard/2-cards/test_external_launcher.py - tests/e2e/multicard/2-cards/test_offline_weight_load.py - tests/e2e/multicard/2-cards/test_quantization.py - tests/e2e/multicard/4-cards/test_data_parallel_tp2.py TODO: - tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py - tests/e2e/multicard/4-cards/long_sequence/test_mtp.py ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.14.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 Signed-off-by: wjunLu --- .github/workflows/scripts/config.yaml | 5 ----- tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py | 1 - tests/e2e/multicard/2-cards/test_external_launcher.py | 1 - tests/e2e/multicard/2-cards/test_offline_weight_load.py | 1 - tests/e2e/multicard/2-cards/test_quantization.py | 1 - .../e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py | 1 - tests/e2e/multicard/4-cards/test_data_parallel_tp2.py | 1 - 7 files changed, 11 deletions(-) diff --git a/.github/workflows/scripts/config.yaml b/.github/workflows/scripts/config.yaml index 91d180c0..0509c4cf 100644 --- a/.github/workflows/scripts/config.yaml +++ b/.github/workflows/scripts/config.yaml @@ -131,19 +131,14 @@ e2e-multicard-4-cards: estimated_time: 1250 - name: tests/e2e/multicard/4-cards/test_data_parallel_tp2.py estimated_time: 60 - is_skipped: true - name: tests/e2e/multicard/4-cards/test_kimi_k2.py estimated_time: 100 - is_skipped: true - name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py estimated_time: 60 - is_skipped: true - name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py estimated_time: 60 - is_skipped: true - name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py estimated_time: 60 - is_skipped: true - name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py estimated_time: 60 is_skipped: true diff --git a/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py b/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py index a7ed6baa..89760a4e 100644 --- a/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +++ b/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py @@ -48,7 +48,6 @@ BASELINES_SP = { } -@pytest.mark.skip(reason="Failed with CANN8.5, fix me") @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"}) @pytest.mark.parametrize("method", ["eagle3"]) @pytest.mark.parametrize("num_speculative_tokens", [3]) diff --git a/tests/e2e/multicard/2-cards/test_external_launcher.py b/tests/e2e/multicard/2-cards/test_external_launcher.py index f8b59e38..ef2da648 100644 --- a/tests/e2e/multicard/2-cards/test_external_launcher.py +++ b/tests/e2e/multicard/2-cards/test_external_launcher.py @@ -78,7 +78,6 @@ def test_qwen3_external_launcher(model): assert proc.returncode == 0 -@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me") @pytest.mark.parametrize("model", MOE_MODELS) def test_qwen3_moe_external_launcher_ep_tp2(model): script = Path( diff --git a/tests/e2e/multicard/2-cards/test_offline_weight_load.py b/tests/e2e/multicard/2-cards/test_offline_weight_load.py index d94fa322..6d6961b0 100644 --- a/tests/e2e/multicard/2-cards/test_offline_weight_load.py +++ b/tests/e2e/multicard/2-cards/test_offline_weight_load.py @@ -29,7 +29,6 @@ import pytest MODELS = ["Qwen/Qwen3-30B-A3B"] -@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me") @pytest.mark.parametrize("model", MODELS) @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_qwen3_offline_load_and_sleepmode_tp2(model): diff --git a/tests/e2e/multicard/2-cards/test_quantization.py b/tests/e2e/multicard/2-cards/test_quantization.py index 245693ff..b356ba3b 100644 --- a/tests/e2e/multicard/2-cards/test_quantization.py +++ b/tests/e2e/multicard/2-cards/test_quantization.py @@ -44,7 +44,6 @@ def test_qwen2_5_w8a8_external_quantized_tp2(): print(f"Generated text: {vllm_output[i][1]!r}") -@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me") def test_qwen3_moe_w8a8_dynamic_llm_compressor(): example_prompts = [ "The president of the United States is", diff --git a/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py b/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py index 6b2c69a5..709bb3e6 100644 --- a/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +++ b/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py @@ -34,7 +34,6 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" MODELS = ["Qwen/Qwen3-Next-80B-A3B-Instruct"] -@pytest.mark.skip(reason="Failed with CANN8.5, fix me") @pytest.mark.parametrize("model_name", MODELS) def test_qwen3_next_mtp_acceptance_tp4(model_name): golden = [0.85, 0.46, 0.19] diff --git a/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py b/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py index 993cab9e..0aec68ca 100644 --- a/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +++ b/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py @@ -8,7 +8,6 @@ import pytest MODELS = ["Qwen/Qwen3-30B-A3B"] -@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me") @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_tokens", [32]) @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})