[CI] Enable the skipped cases when HDK is upgraded to 25.5.0 (#6195)

### What this PR does / why we need it? Enable the tests that were skipped due to an outdated driver version: - tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py - tests/e2e/multicard/4-cards/long_sequence/test_basic.py - tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py and some cases in - tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py - tests/e2e/multicard/2-cards/test_external_launcher.py - tests/e2e/multicard/2-cards/test_offline_weight_load.py - tests/e2e/multicard/2-cards/test_quantization.py - tests/e2e/multicard/4-cards/test_data_parallel_tp2.py TODO: - tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py - tests/e2e/multicard/4-cards/long_sequence/test_mtp.py ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.14.0 - vLLM main: d68209402d Signed-off-by: wjunLu <wjunlu217@gmail.com>
2026-01-29 22:41:41 +08:00
parent e35f304419
commit 4970de4242
7 changed files with 0 additions and 11 deletions
--- a/.github/workflows/scripts/config.yaml
+++ b/.github/workflows/scripts/config.yaml
@@ -131,19 +131,14 @@ e2e-multicard-4-cards:
    estimated_time: 1250
  - name: tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
    estimated_time: 60
-    is_skipped: true
  - name: tests/e2e/multicard/4-cards/test_kimi_k2.py
    estimated_time: 100
-    is_skipped: true
  - name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
    estimated_time: 60
-    is_skipped: true
  - name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py
    estimated_time: 60
-    is_skipped: true
  - name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py
    estimated_time: 60
-    is_skipped: true
  - name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
    estimated_time: 60
    is_skipped: true
--- a/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
+++ b/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
@@ -48,7 +48,6 @@ BASELINES_SP = {
 }


-@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
@pytest.mark.parametrize("method", ["eagle3"])
@pytest.mark.parametrize("num_speculative_tokens", [3])
--- a/tests/e2e/multicard/2-cards/test_external_launcher.py
+++ b/tests/e2e/multicard/2-cards/test_external_launcher.py
@@ -78,7 +78,6 @@ def test_qwen3_external_launcher(model):
    assert proc.returncode == 0


-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MOE_MODELS)
 def test_qwen3_moe_external_launcher_ep_tp2(model):
    script = Path(
--- a/tests/e2e/multicard/2-cards/test_offline_weight_load.py
+++ b/tests/e2e/multicard/2-cards/test_offline_weight_load.py
@@ -29,7 +29,6 @@ import pytest
 MODELS = ["Qwen/Qwen3-30B-A3B"]


-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
 def test_qwen3_offline_load_and_sleepmode_tp2(model):
--- a/tests/e2e/multicard/2-cards/test_quantization.py
+++ b/tests/e2e/multicard/2-cards/test_quantization.py
@@ -44,7 +44,6 @@ def test_qwen2_5_w8a8_external_quantized_tp2():
        print(f"Generated text: {vllm_output[i][1]!r}")


-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
 def test_qwen3_moe_w8a8_dynamic_llm_compressor():
    example_prompts = [
        "The president of the United States is",
--- a/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
+++ b/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
@@ -34,7 +34,6 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 MODELS = ["Qwen/Qwen3-Next-80B-A3B-Instruct"]


-@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
@pytest.mark.parametrize("model_name", MODELS)
 def test_qwen3_next_mtp_acceptance_tp4(model_name):
    golden = [0.85, 0.46, 0.19]
--- a/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
+++ b/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
@@ -8,7 +8,6 @@ import pytest
 MODELS = ["Qwen/Qwen3-30B-A3B"]


-@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [32])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})