[CI] Enable MTP torchair e2e test (#2705)

enable MTP torchair e2e test

- vLLM version: v0.10.1.1
- vLLM main:
ce30dca5c4

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-09-03 08:57:43 +08:00
committed by GitHub
parent af62af3cc5
commit 24d4dad7b2
3 changed files with 1 additions and 7 deletions

View File

@@ -209,7 +209,7 @@ jobs:
# ------------------------------------ v1 spec decode test ------------------------------------ #
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
pytest -sv tests/e2e/singlecard/ops/

View File

@@ -74,4 +74,3 @@ def test_mtp_correctness(
# Heuristic: expect at least 66% of the prompts to match exactly
# Upon failure, inspect the outputs to check for inaccuracy.
assert matches > int(0.66 * len(ref_outputs))
del spec_llm

View File

@@ -6,7 +6,6 @@ import pytest
from vllm import SamplingParams
from tests.e2e.conftest import VllmRunner
from vllm_ascend.ascend_config import clear_ascend_config
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -35,7 +34,6 @@ def test_mtp_torchair_correctness(
Compare the outputs of a original LLM and a speculative LLM
should be the same when using mtp speculative decoding.
'''
clear_ascend_config()
with VllmRunner(model_name,
tensor_parallel_size=1,
gpu_memory_utilization=0.7,
@@ -49,7 +47,6 @@ def test_mtp_torchair_correctness(
},
}) as ref_llm:
ref_outputs = ref_llm.generate(example_prompts, sampling_config)
clear_ascend_config()
with VllmRunner(model_name,
tensor_parallel_size=1,
max_num_seqs=256,
@@ -86,5 +83,3 @@ def test_mtp_torchair_correctness(
# Heuristic: expect at least 66% of the prompts to match exactly
# Upon failure, inspect the outputs to check for inaccuracy.
assert matches > int(0.66 * len(ref_outputs))
del spec_llm
clear_ascend_config()