Sync from v0.13
This commit is contained in:
60
tests/entrypoints/sagemaker/conftest.py
Normal file
60
tests/entrypoints/sagemaker/conftest.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Shared fixtures and utilities for SageMaker tests."""
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
# Model name constants used across tests
|
||||
MODEL_NAME_SMOLLM = "HuggingFaceTB/SmolLM2-135M-Instruct"
|
||||
LORA_ADAPTER_NAME_SMOLLM = "jekunz/smollm-135m-lora-fineweb-faroese"
|
||||
|
||||
# SageMaker header constants
|
||||
HEADER_SAGEMAKER_CLOSED_SESSION_ID = "X-Amzn-SageMaker-Closed-Session-Id"
|
||||
HEADER_SAGEMAKER_SESSION_ID = "X-Amzn-SageMaker-Session-Id"
|
||||
HEADER_SAGEMAKER_NEW_SESSION_ID = "X-Amzn-SageMaker-New-Session-Id"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def smollm2_lora_files():
|
||||
"""Download LoRA files once per test session."""
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
return snapshot_download(repo_id=LORA_ADAPTER_NAME_SMOLLM)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def basic_server_with_lora(smollm2_lora_files):
|
||||
"""Basic server fixture with standard configuration."""
|
||||
args = [
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
"--enforce-eager",
|
||||
# lora config below
|
||||
"--enable-lora",
|
||||
"--max-lora-rank",
|
||||
"256",
|
||||
"--max-cpu-loras",
|
||||
"2",
|
||||
"--max-num-seqs",
|
||||
"64",
|
||||
]
|
||||
|
||||
envs = {
|
||||
"VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
|
||||
"SAGEMAKER_ENABLE_STATEFUL_SESSIONS": "True",
|
||||
}
|
||||
with RemoteOpenAIServer(MODEL_NAME_SMOLLM, args, env_dict=envs) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def async_client(basic_server_with_lora: RemoteOpenAIServer):
|
||||
"""Async OpenAI client fixture for use with basic_server."""
|
||||
async with basic_server_with_lora.get_async_client() as async_client:
|
||||
yield async_client
|
||||
Reference in New Issue
Block a user