[CI] Enable the skipped cases when HDK is upgraded to 25.5.0 (#6195)

### What this PR does / why we need it?
Enable the tests that were skipped due to an outdated driver version:
- tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
- tests/e2e/multicard/4-cards/long_sequence/test_basic.py
- tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py

and some cases in
- tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
- tests/e2e/multicard/2-cards/test_external_launcher.py
- tests/e2e/multicard/2-cards/test_offline_weight_load.py
- tests/e2e/multicard/2-cards/test_quantization.py
- tests/e2e/multicard/4-cards/test_data_parallel_tp2.py

TODO:
- tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
- tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.14.0
- vLLM main:
d68209402d

Signed-off-by: wjunLu <wjunlu217@gmail.com>
This commit is contained in:
wjunLu
2026-01-29 22:41:41 +08:00
committed by GitHub
parent e35f304419
commit 4970de4242
7 changed files with 0 additions and 11 deletions

View File

@@ -131,19 +131,14 @@ e2e-multicard-4-cards:
estimated_time: 1250
- name: tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
estimated_time: 60
is_skipped: true
- name: tests/e2e/multicard/4-cards/test_kimi_k2.py
estimated_time: 100
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
estimated_time: 60
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py
estimated_time: 60
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py
estimated_time: 60
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
estimated_time: 60
is_skipped: true

View File

@@ -48,7 +48,6 @@ BASELINES_SP = {
}
@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
@pytest.mark.parametrize("method", ["eagle3"])
@pytest.mark.parametrize("num_speculative_tokens", [3])

View File

@@ -78,7 +78,6 @@ def test_qwen3_external_launcher(model):
assert proc.returncode == 0
@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MOE_MODELS)
def test_qwen3_moe_external_launcher_ep_tp2(model):
script = Path(

View File

@@ -29,7 +29,6 @@ import pytest
MODELS = ["Qwen/Qwen3-30B-A3B"]
@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_qwen3_offline_load_and_sleepmode_tp2(model):

View File

@@ -44,7 +44,6 @@ def test_qwen2_5_w8a8_external_quantized_tp2():
print(f"Generated text: {vllm_output[i][1]!r}")
@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
def test_qwen3_moe_w8a8_dynamic_llm_compressor():
example_prompts = [
"The president of the United States is",

View File

@@ -34,7 +34,6 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
MODELS = ["Qwen/Qwen3-Next-80B-A3B-Instruct"]
@pytest.mark.skip(reason="Failed with CANN8.5, fix me")
@pytest.mark.parametrize("model_name", MODELS)
def test_qwen3_next_mtp_acceptance_tp4(model_name):
golden = [0.85, 0.46, 0.19]

View File

@@ -8,7 +8,6 @@ import pytest
MODELS = ["Qwen/Qwen3-30B-A3B"]
@pytest.mark.skip(reason="CANN8.5 failed, capture stream failed, fix me")
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [32])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3"})