[CI] Enable MTP torchair e2e test (#2705)
enable MTP torchair e2e test
- vLLM version: v0.10.1.1
- vLLM main:
ce30dca5c4
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
2
.github/workflows/vllm_ascend_test.yaml
vendored
2
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -209,7 +209,7 @@ jobs:
|
||||
|
||||
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
||||
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
|
||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||
|
||||
pytest -sv tests/e2e/singlecard/ops/
|
||||
|
||||
@@ -74,4 +74,3 @@ def test_mtp_correctness(
|
||||
# Heuristic: expect at least 66% of the prompts to match exactly
|
||||
# Upon failure, inspect the outputs to check for inaccuracy.
|
||||
assert matches > int(0.66 * len(ref_outputs))
|
||||
del spec_llm
|
||||
|
||||
@@ -6,7 +6,6 @@ import pytest
|
||||
from vllm import SamplingParams
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from vllm_ascend.ascend_config import clear_ascend_config
|
||||
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
@@ -35,7 +34,6 @@ def test_mtp_torchair_correctness(
|
||||
Compare the outputs of a original LLM and a speculative LLM
|
||||
should be the same when using mtp speculative decoding.
|
||||
'''
|
||||
clear_ascend_config()
|
||||
with VllmRunner(model_name,
|
||||
tensor_parallel_size=1,
|
||||
gpu_memory_utilization=0.7,
|
||||
@@ -49,7 +47,6 @@ def test_mtp_torchair_correctness(
|
||||
},
|
||||
}) as ref_llm:
|
||||
ref_outputs = ref_llm.generate(example_prompts, sampling_config)
|
||||
clear_ascend_config()
|
||||
with VllmRunner(model_name,
|
||||
tensor_parallel_size=1,
|
||||
max_num_seqs=256,
|
||||
@@ -86,5 +83,3 @@ def test_mtp_torchair_correctness(
|
||||
# Heuristic: expect at least 66% of the prompts to match exactly
|
||||
# Upon failure, inspect the outputs to check for inaccuracy.
|
||||
assert matches > int(0.66 * len(ref_outputs))
|
||||
del spec_llm
|
||||
clear_ascend_config()
|
||||
|
||||
Reference in New Issue
Block a user