Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/v1/engine/test_init_error_messaging.py
+++ b/tests/v1/engine/test_init_error_messaging.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.v1.core.kv_cache_utils import check_enough_kv_cache_memory
+from vllm.v1.kv_cache_interface import FullAttentionSpec
+
+
+def test_kv_cache_oom_no_memory():
+    from unittest.mock import MagicMock
+
+    config = MagicMock()
+    config.model_config.max_model_len = 2048
+
+    spec = {
+        "layer_0": FullAttentionSpec(
+            block_size=16,
+            num_kv_heads=8,
+            head_size=128,
+            dtype="float16",
+        )
+    }
+
+    with pytest.raises(ValueError):
+        check_enough_kv_cache_memory(config, spec, 0)
+
+
+def test_kv_cache_oom_insufficient_memory(monkeypatch):
+    from unittest.mock import MagicMock
+
+    config = MagicMock()
+    config.model_config.max_model_len = 2048
+    config.cache_config.block_size = 16
+    config.parallel_config.tensor_parallel_size = 1
+    config.parallel_config.pipeline_parallel_size = 1
+    config.parallel_config.decode_context_parallel_size = 1
+
+    monkeypatch.setattr(
+        "vllm.v1.core.kv_cache_utils.max_memory_usage_bytes",
+        lambda c, s: 100 * 1024**3,  # 100 GiB
+    )
+
+    spec = {
+        "layer_0": FullAttentionSpec(
+            block_size=16,
+            num_kv_heads=8,
+            head_size=128,
+            dtype="float16",
+        )
+    }
+
+    with pytest.raises(ValueError):
+        check_enough_kv_cache_memory(config, spec, 1024**3)  # 1 GiB