diff --git a/tests/e2e/singlecard/test_aclgraph.py b/tests/e2e/singlecard/test_aclgraph.py index 86ce4795..17d1f4a4 100644 --- a/tests/e2e/singlecard/test_aclgraph.py +++ b/tests/e2e/singlecard/test_aclgraph.py @@ -21,8 +21,6 @@ Run `pytest tests/compile/test_aclgraph.py`. """ import os -import random -import string import pytest from vllm import SamplingParams @@ -101,7 +99,7 @@ def test_models_with_aclgraph( @pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("max_tokens", [5]) +@pytest.mark.parametrize("max_tokens", [32]) def test_models_with_aclgraph_full_decode_only( model: str, max_tokens: int, @@ -110,7 +108,6 @@ def test_models_with_aclgraph_full_decode_only( del os.environ['HCCL_OP_EXPANSION_MODE'] # NOTE: Randomly fill the prompt with the requested amount for # the specified capture shape to prevent accuracy issues caused by padding - random_number = random.choice(list(range(6, 47, 8))) prompts = [ ('Solve the following math problem step by step.' 'The last line of your response should be of the form Answer: ' @@ -136,12 +133,20 @@ def test_models_with_aclgraph_full_decode_only( 'and $x^2 + bx + c = 0$ have a common real root, and the equations $x^2 + x + a = 0$' 'and $x^2 + cx + b = 0$ also have a common real root.' 'Compute the sum $a + b + c$.') - ] + [ - ''.join(random.choices(string.ascii_lowercase, k=random.randint( - 1, 25))) for _ in range(random_number) + ] + vllm_aclgraph_qwen_answers = [ + ' \n\nTo solve this problem, we need to use the Law of Sines and Law of Cosines. Let me start by drawing triangle $ABC$ with the', + " \n\nTo solve this problem, we can use the fact that the expected value of the area of a triangle formed by two random points on a square's perimeter is", + 'i$.\n\nLet $a, b, c$ be distinct numbers such that the equations $x^2 + ax + 1 = 0$ and' ] - sampling_params = SamplingParams(max_tokens=5, + vllm_aclgraph_ds_answers = [ + '\n\nSelect an assignment template', + '\n\nSelect an assignment template', + '\n\nSelect an assignment template' + ] + + sampling_params = SamplingParams(max_tokens=max_tokens, n=1, temperature=0.0, top_p=1.0, @@ -157,14 +162,6 @@ def test_models_with_aclgraph_full_decode_only( vllm_aclgraph_outputs = runner.model.generate( prompts, sampling_params) - with VllmRunner( - model, - max_model_len=1024, - enforce_eager=True, - quantization="ascend", - ) as runner: - vllm_eager_outputs = runner.model.generate(prompts, - sampling_params) else: with VllmRunner( model, @@ -178,25 +175,16 @@ def test_models_with_aclgraph_full_decode_only( vllm_aclgraph_outputs = runner.model.generate( prompts, sampling_params) - with VllmRunner( - model, - max_model_len=1024, - compilation_config={ - "cudagraph_capture_sizes": [4, 8, 32, 64], - }, - ) as runner: - vllm_eager_outputs = runner.model.generate(prompts, - sampling_params) - vllm_aclgraph_outputs_list = [] for output in vllm_aclgraph_outputs: vllm_aclgraph_outputs_list.append( - (output.outputs[0].index, output.outputs[0].text)) - + ([output.outputs[0].index], output.outputs[0].text)) vllm_eager_outputs_list = [] - for output in vllm_eager_outputs: - vllm_eager_outputs_list.append( - (output.outputs[0].index, output.outputs[0].text)) + vllm_eager_outputs_list = ([ + ([0], answer) for answer in vllm_aclgraph_ds_answers + ] if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8" else [ + ([0], answer) for answer in vllm_aclgraph_qwen_answers + ]) check_outputs_equal( outputs_0_lst=vllm_eager_outputs_list,