Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/tpu/test_custom_dispatcher.py
+++ b/tests/tpu/test_custom_dispatcher.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.config import CompilationMode
+
+from ..utils import compare_two_settings
+
+# --enforce-eager on TPU causes graph compilation
+# this times out default Health Check in the MQLLMEngine,
+# so we set the timeout here to 30s
+
+
+def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_RPC_TIMEOUT", "30000")
+        compare_two_settings(
+            "Qwen/Qwen2.5-1.5B-Instruct",
+            arg1=[
+                "--max-model-len=256",
+                "--max-num-seqs=32",
+                "--enforce-eager",
+                f"-O{CompilationMode.DYNAMO_TRACE_ONCE}",
+            ],
+            arg2=[
+                "--max-model-len=256",
+                "--max-num-seqs=32",
+                "--enforce-eager",
+                f"-O{CompilationMode.STOCK_TORCH_COMPILE}",
+            ],
+            env1={},
+            env2={},
+        )