From 24d4dad7b220d06111d8cd7017e665f639b22f15 Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Wed, 3 Sep 2025 08:57:43 +0800
Subject: [PATCH] [CI] Enable MTP torchair e2e test (#2705)

enable MTP torchair e2e test

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/ce30dca5c44353f278dc114bd6f03b11700088eb

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 .github/workflows/vllm_ascend_test.yaml                      | 2 +-
 .../e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py | 1 -
 .../spec_decode_v1/test_v1_mtp_torchair_correctness.py       | 5 -----
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index e2b5fb2..1fc10dc 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -209,7 +209,7 @@ jobs:
 
           # ------------------------------------ v1 spec decode test ------------------------------------ #
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
 
           pytest -sv tests/e2e/singlecard/ops/
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
index 71d274c..0c01a07 100644
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -74,4 +74,3 @@ def test_mtp_correctness(
     # Heuristic: expect at least 66% of the prompts to match exactly
     # Upon failure, inspect the outputs to check for inaccuracy.
     assert matches > int(0.66 * len(ref_outputs))
-    del spec_llm
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
index 16825f0..1bf6fea 100644
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
@@ -6,7 +6,6 @@ import pytest
 from vllm import SamplingParams
 
 from tests.e2e.conftest import VllmRunner
-from vllm_ascend.ascend_config import clear_ascend_config
 
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
@@ -35,7 +34,6 @@ def test_mtp_torchair_correctness(
     Compare the outputs of a original LLM and a speculative LLM
     should be the same when using mtp speculative decoding.
     '''
-    clear_ascend_config()
     with VllmRunner(model_name,
                     tensor_parallel_size=1,
                     gpu_memory_utilization=0.7,
@@ -49,7 +47,6 @@ def test_mtp_torchair_correctness(
                         },
                     }) as ref_llm:
         ref_outputs = ref_llm.generate(example_prompts, sampling_config)
-    clear_ascend_config()
     with VllmRunner(model_name,
                     tensor_parallel_size=1,
                     max_num_seqs=256,
@@ -86,5 +83,3 @@ def test_mtp_torchair_correctness(
     # Heuristic: expect at least 66% of the prompts to match exactly
     # Upon failure, inspect the outputs to check for inaccuracy.
     assert matches > int(0.66 * len(ref_outputs))
-    del spec_llm
-    clear_ascend_config()