[CI] Enable MTP torchair e2e test (#2705)
enable MTP torchair e2e test
- vLLM version: v0.10.1.1
- vLLM main:
ce30dca5c4
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
2
.github/workflows/vllm_ascend_test.yaml
vendored
2
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -209,7 +209,7 @@ jobs:
|
|||||||
|
|
||||||
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
||||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
||||||
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
|
||||||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
||||||
|
|
||||||
pytest -sv tests/e2e/singlecard/ops/
|
pytest -sv tests/e2e/singlecard/ops/
|
||||||
|
|||||||
@@ -74,4 +74,3 @@ def test_mtp_correctness(
|
|||||||
# Heuristic: expect at least 66% of the prompts to match exactly
|
# Heuristic: expect at least 66% of the prompts to match exactly
|
||||||
# Upon failure, inspect the outputs to check for inaccuracy.
|
# Upon failure, inspect the outputs to check for inaccuracy.
|
||||||
assert matches > int(0.66 * len(ref_outputs))
|
assert matches > int(0.66 * len(ref_outputs))
|
||||||
del spec_llm
|
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import pytest
|
|||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
|
|
||||||
from tests.e2e.conftest import VllmRunner
|
from tests.e2e.conftest import VllmRunner
|
||||||
from vllm_ascend.ascend_config import clear_ascend_config
|
|
||||||
|
|
||||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||||
|
|
||||||
@@ -35,7 +34,6 @@ def test_mtp_torchair_correctness(
|
|||||||
Compare the outputs of a original LLM and a speculative LLM
|
Compare the outputs of a original LLM and a speculative LLM
|
||||||
should be the same when using mtp speculative decoding.
|
should be the same when using mtp speculative decoding.
|
||||||
'''
|
'''
|
||||||
clear_ascend_config()
|
|
||||||
with VllmRunner(model_name,
|
with VllmRunner(model_name,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
gpu_memory_utilization=0.7,
|
gpu_memory_utilization=0.7,
|
||||||
@@ -49,7 +47,6 @@ def test_mtp_torchair_correctness(
|
|||||||
},
|
},
|
||||||
}) as ref_llm:
|
}) as ref_llm:
|
||||||
ref_outputs = ref_llm.generate(example_prompts, sampling_config)
|
ref_outputs = ref_llm.generate(example_prompts, sampling_config)
|
||||||
clear_ascend_config()
|
|
||||||
with VllmRunner(model_name,
|
with VllmRunner(model_name,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
max_num_seqs=256,
|
max_num_seqs=256,
|
||||||
@@ -86,5 +83,3 @@ def test_mtp_torchair_correctness(
|
|||||||
# Heuristic: expect at least 66% of the prompts to match exactly
|
# Heuristic: expect at least 66% of the prompts to match exactly
|
||||||
# Upon failure, inspect the outputs to check for inaccuracy.
|
# Upon failure, inspect the outputs to check for inaccuracy.
|
||||||
assert matches > int(0.66 * len(ref_outputs))
|
assert matches > int(0.66 * len(ref_outputs))
|
||||||
del spec_llm
|
|
||||||
clear_ascend_config()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user