From 24d4dad7b220d06111d8cd7017e665f639b22f15 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Wed, 3 Sep 2025 08:57:43 +0800 Subject: [PATCH] [CI] Enable MTP torchair e2e test (#2705) enable MTP torchair e2e test - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/ce30dca5c44353f278dc114bd6f03b11700088eb Signed-off-by: wangxiyuan --- .github/workflows/vllm_ascend_test.yaml | 2 +- .../e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py | 1 - .../spec_decode_v1/test_v1_mtp_torchair_correctness.py | 5 ----- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index e2b5fb2..1fc10dc 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -209,7 +209,7 @@ jobs: # ------------------------------------ v1 spec decode test ------------------------------------ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py - # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py pytest -sv tests/e2e/singlecard/ops/ diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py index 71d274c..0c01a07 100644 --- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py @@ -74,4 +74,3 @@ def test_mtp_correctness( # Heuristic: expect at least 66% of the prompts to match exactly # Upon failure, inspect the outputs to check for inaccuracy. assert matches > int(0.66 * len(ref_outputs)) - del spec_llm diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py index 16825f0..1bf6fea 100644 --- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py @@ -6,7 +6,6 @@ import pytest from vllm import SamplingParams from tests.e2e.conftest import VllmRunner -from vllm_ascend.ascend_config import clear_ascend_config os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" @@ -35,7 +34,6 @@ def test_mtp_torchair_correctness( Compare the outputs of a original LLM and a speculative LLM should be the same when using mtp speculative decoding. ''' - clear_ascend_config() with VllmRunner(model_name, tensor_parallel_size=1, gpu_memory_utilization=0.7, @@ -49,7 +47,6 @@ def test_mtp_torchair_correctness( }, }) as ref_llm: ref_outputs = ref_llm.generate(example_prompts, sampling_config) - clear_ascend_config() with VllmRunner(model_name, tensor_parallel_size=1, max_num_seqs=256, @@ -86,5 +83,3 @@ def test_mtp_torchair_correctness( # Heuristic: expect at least 66% of the prompts to match exactly # Upon failure, inspect the outputs to check for inaccuracy. assert matches > int(0.66 * len(ref_outputs)) - del spec_llm - clear_ascend_config()