Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/entrypoints/sagemaker/conftest.py
+++ b/tests/entrypoints/sagemaker/conftest.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Shared fixtures and utilities for SageMaker tests."""
+
+import pytest
+import pytest_asyncio
+
+from ...utils import RemoteOpenAIServer
+
+# Model name constants used across tests
+MODEL_NAME_SMOLLM = "HuggingFaceTB/SmolLM2-135M-Instruct"
+LORA_ADAPTER_NAME_SMOLLM = "jekunz/smollm-135m-lora-fineweb-faroese"
+
+# SageMaker header constants
+HEADER_SAGEMAKER_CLOSED_SESSION_ID = "X-Amzn-SageMaker-Closed-Session-Id"
+HEADER_SAGEMAKER_SESSION_ID = "X-Amzn-SageMaker-Session-Id"
+HEADER_SAGEMAKER_NEW_SESSION_ID = "X-Amzn-SageMaker-New-Session-Id"
+
+
+@pytest.fixture(scope="session")
+def smollm2_lora_files():
+    """Download LoRA files once per test session."""
+    from huggingface_hub import snapshot_download
+
+    return snapshot_download(repo_id=LORA_ADAPTER_NAME_SMOLLM)
+
+
+@pytest.fixture(scope="module")
+def basic_server_with_lora(smollm2_lora_files):
+    """Basic server fixture with standard configuration."""
+    args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "8192",
+        "--enforce-eager",
+        # lora config below
+        "--enable-lora",
+        "--max-lora-rank",
+        "256",
+        "--max-cpu-loras",
+        "2",
+        "--max-num-seqs",
+        "64",
+    ]
+
+    envs = {
+        "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
+        "SAGEMAKER_ENABLE_STATEFUL_SESSIONS": "True",
+    }
+    with RemoteOpenAIServer(MODEL_NAME_SMOLLM, args, env_dict=envs) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def async_client(basic_server_with_lora: RemoteOpenAIServer):
+    """Async OpenAI client fixture for use with basic_server."""
+    async with basic_server_with_lora.get_async_client() as async_client:
+        yield async_client