Sync from v0.13
This commit is contained in:
42
tests/distributed/test_pp_cudagraph.py
Normal file
42
tests/distributed/test_pp_cudagraph.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
from typing_extensions import LiteralString
|
||||
|
||||
from ..utils import compare_two_settings, create_new_process_for_each_test
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"PP_SIZE, MODEL_NAME",
|
||||
[
|
||||
(2, "JackFram/llama-160m"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"ATTN_BACKEND",
|
||||
[
|
||||
"FLASH_ATTN",
|
||||
],
|
||||
)
|
||||
@create_new_process_for_each_test()
|
||||
def test_pp_cudagraph(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
PP_SIZE: int,
|
||||
MODEL_NAME: str,
|
||||
ATTN_BACKEND: LiteralString,
|
||||
):
|
||||
with monkeypatch.context() as m:
|
||||
cudagraph_args = [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
"float16",
|
||||
"--pipeline-parallel-size",
|
||||
str(PP_SIZE),
|
||||
"--distributed-executor-backend",
|
||||
"mp",
|
||||
]
|
||||
m.setenv("VLLM_ATTENTION_BACKEND", ATTN_BACKEND)
|
||||
|
||||
eager_args = cudagraph_args + ["--enforce-eager"]
|
||||
|
||||
compare_two_settings(MODEL_NAME, eager_args, cudagraph_args)
|
||||
Reference in New Issue
Block a user