Sync from v0.13
This commit is contained in:
54
tests/v1/engine/test_init_error_messaging.py
Normal file
54
tests/v1/engine/test_init_error_messaging.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.v1.core.kv_cache_utils import check_enough_kv_cache_memory
|
||||
from vllm.v1.kv_cache_interface import FullAttentionSpec
|
||||
|
||||
|
||||
def test_kv_cache_oom_no_memory():
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
config = MagicMock()
|
||||
config.model_config.max_model_len = 2048
|
||||
|
||||
spec = {
|
||||
"layer_0": FullAttentionSpec(
|
||||
block_size=16,
|
||||
num_kv_heads=8,
|
||||
head_size=128,
|
||||
dtype="float16",
|
||||
)
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
check_enough_kv_cache_memory(config, spec, 0)
|
||||
|
||||
|
||||
def test_kv_cache_oom_insufficient_memory(monkeypatch):
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
config = MagicMock()
|
||||
config.model_config.max_model_len = 2048
|
||||
config.cache_config.block_size = 16
|
||||
config.parallel_config.tensor_parallel_size = 1
|
||||
config.parallel_config.pipeline_parallel_size = 1
|
||||
config.parallel_config.decode_context_parallel_size = 1
|
||||
|
||||
monkeypatch.setattr(
|
||||
"vllm.v1.core.kv_cache_utils.max_memory_usage_bytes",
|
||||
lambda c, s: 100 * 1024**3, # 100 GiB
|
||||
)
|
||||
|
||||
spec = {
|
||||
"layer_0": FullAttentionSpec(
|
||||
block_size=16,
|
||||
num_kv_heads=8,
|
||||
head_size=128,
|
||||
dtype="float16",
|
||||
)
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
check_enough_kv_cache_memory(config, spec, 1024**3) # 1 GiB
|
||||
Reference in New Issue
Block a user