From 4970de42420f6504dabf6d70b3cbfa7881021165 Mon Sep 17 00:00:00 2001
From: wjunLu <135617475+wjunLu@users.noreply.github.com>
Date: Thu, 29 Jan 2026 22:41:41 +0800
Subject: [PATCH] [CI] Enable the skipped cases when HDK is upgraded to 25.5.0
 (#6195)

### What this PR does / why we need it?
Enable the tests that were skipped due to an outdated driver version:
- tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
- tests/e2e/multicard/4-cards/long_sequence/test_basic.py
- tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py

and some cases in
- tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
- tests/e2e/multicard/2-cards/test_external_launcher.py
- tests/e2e/multicard/2-cards/test_offline_weight_load.py
- tests/e2e/multicard/2-cards/test_quantization.py
- tests/e2e/multicard/4-cards/test_data_parallel_tp2.py

TODO:
- tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
- tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.14.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60

Signed-off-by: wjunLu <wjunlu217@gmail.com>
---
 .github/workflows/scripts/config.yaml                        | 5 -----
 tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py  | 1 -
 tests/e2e/multicard/2-cards/test_external_launcher.py        | 1 -
 tests/e2e/multicard/2-cards/test_offline_weight_load.py      | 1 -
 tests/e2e/multicard/2-cards/test_quantization.py             | 1 -
 .../e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py | 1 -
 tests/e2e/multicard/4-cards/test_data_parallel_tp2.py        | 1 -
 7 files changed, 11 deletions(-)

diff --git a/.github/workflows/scripts/config.yaml b/.github/workflows/scripts/config.yaml
index 91d180c0..0509c4cf 100644
--- a/.github/workflows/scripts/config.yaml
+++ b/.github/workflows/scripts/config.yaml
@@ -131,19 +131,14 @@ e2e-multicard-4-cards:
     estimated_time: 1250
   - name: tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
     estimated_time: 60
-    is_skipped: true
   - name: tests/e2e/multicard/4-cards/test_kimi_k2.py
     estimated_time: 100
-    is_skipped: true
   - name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
     estimated_time: 60
-    is_skipped: true
   - name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py
     estimated_time: 60
-    is_skipped: true
   - name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py
     estimated_time: 60
-    is_skipped: true
   - name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
     estimated_time: 60
     is_skipped: true
diff --git a/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py b/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
index a7ed6baa..89760a4e 100644
--- a/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
+++ b/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
@@ -48,7 +48,6 @@ BASELINES_SP = {
 }
 
 
-@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
 @pytest.mark.parametrize("method", ["eagle3"])
 @pytest.mark.parametrize("num_speculative_tokens", [3])
diff --git a/tests/e2e/multicard/2-cards/test_external_launcher.py b/tests/e2e/multicard/2-cards/test_external_launcher.py
index f8b59e38..ef2da648 100644
--- a/tests/e2e/multicard/2-cards/test_external_launcher.py
+++ b/tests/e2e/multicard/2-cards/test_external_launcher.py
@@ -78,7 +78,6 @@ def test_qwen3_external_launcher(model):
     assert proc.returncode == 0
 
 
-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
 @pytest.mark.parametrize("model", MOE_MODELS)
 def test_qwen3_moe_external_launcher_ep_tp2(model):
     script = Path(
diff --git a/tests/e2e/multicard/2-cards/test_offline_weight_load.py b/tests/e2e/multicard/2-cards/test_offline_weight_load.py
index d94fa322..6d6961b0 100644
--- a/tests/e2e/multicard/2-cards/test_offline_weight_load.py
+++ b/tests/e2e/multicard/2-cards/test_offline_weight_load.py
@@ -29,7 +29,6 @@ import pytest
 MODELS = ["Qwen/Qwen3-30B-A3B"]
 
 
-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
 @pytest.mark.parametrize("model", MODELS)
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
 def test_qwen3_offline_load_and_sleepmode_tp2(model):
diff --git a/tests/e2e/multicard/2-cards/test_quantization.py b/tests/e2e/multicard/2-cards/test_quantization.py
index 245693ff..b356ba3b 100644
--- a/tests/e2e/multicard/2-cards/test_quantization.py
+++ b/tests/e2e/multicard/2-cards/test_quantization.py
@@ -44,7 +44,6 @@ def test_qwen2_5_w8a8_external_quantized_tp2():
         print(f"Generated text: {vllm_output[i][1]!r}")
 
 
-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
 def test_qwen3_moe_w8a8_dynamic_llm_compressor():
     example_prompts = [
         "The president of the United States is",
diff --git a/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py b/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
index 6b2c69a5..709bb3e6 100644
--- a/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
+++ b/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
@@ -34,7 +34,6 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 MODELS = ["Qwen/Qwen3-Next-80B-A3B-Instruct"]
 
 
-@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
 @pytest.mark.parametrize("model_name", MODELS)
 def test_qwen3_next_mtp_acceptance_tp4(model_name):
     golden = [0.85, 0.46, 0.19]
diff --git a/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py b/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
index 993cab9e..0aec68ca 100644
--- a/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
+++ b/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
@@ -8,7 +8,6 @@ import pytest
 MODELS = ["Qwen/Qwen3-30B-A3B"]
 
 
-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
 @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})