[CI] Enable MTP torchair e2e test (#2705)

enable MTP torchair e2e test - vLLM version: v0.10.1.1 - vLLM main: ce30dca5c4 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-03 08:57:43 +08:00
parent af62af3cc5
commit 24d4dad7b2
3 changed files with 1 additions and 7 deletions
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -209,7 +209,7 @@ jobs:
          # ------------------------------------ v1 spec decode test ------------------------------------ #
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
          pytest -sv tests/e2e/singlecard/ops/
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -74,4 +74,3 @@ def test_mtp_correctness(
    # Heuristic: expect at least 66% of the prompts to match exactly
    # Upon failure, inspect the outputs to check for inaccuracy.
    assert matches > int(0.66 * len(ref_outputs))
    del spec_llm
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
@@ -6,7 +6,6 @@ import pytest
 from vllm import SamplingParams
 from tests.e2e.conftest import VllmRunner
 from vllm_ascend.ascend_config import clear_ascend_config
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -35,7 +34,6 @@ def test_mtp_torchair_correctness(
    Compare the outputs of a original LLM and a speculative LLM
    should be the same when using mtp speculative decoding.
    '''
    clear_ascend_config()
    with VllmRunner(model_name,
                    tensor_parallel_size=1,
                    gpu_memory_utilization=0.7,
@@ -49,7 +47,6 @@ def test_mtp_torchair_correctness(
                        },
                    }) as ref_llm:
        ref_outputs = ref_llm.generate(example_prompts, sampling_config)
    clear_ascend_config()
    with VllmRunner(model_name,
                    tensor_parallel_size=1,
                    max_num_seqs=256,
@@ -86,5 +83,3 @@ def test_mtp_torchair_correctness(
    # Heuristic: expect at least 66% of the prompts to match exactly
    # Upon failure, inspect the outputs to check for inaccuracy.
    assert matches > int(0.66 * len(ref_outputs))
    del spec_llm
    clear_ascend_config()