Sync from v0.13
This commit is contained in:
93
tests/entrypoints/openai/test_shutdown.py
Normal file
93
tests/entrypoints/openai/test_shutdown.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
from vllm.utils.network_utils import get_open_port
|
||||
|
||||
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shutdown_on_engine_failure():
|
||||
"""Verify that API returns connection error when server process is killed.
|
||||
|
||||
Starts a vLLM server, kills it to simulate a crash, then verifies that
|
||||
subsequent API calls fail appropriately.
|
||||
"""
|
||||
|
||||
port = get_open_port()
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
# dtype, max-len etc set so that this can run in CI
|
||||
sys.executable,
|
||||
"-m",
|
||||
"vllm.entrypoints.openai.api_server",
|
||||
"--model",
|
||||
MODEL_NAME,
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
"--max-model-len",
|
||||
"128",
|
||||
"--enforce-eager",
|
||||
"--port",
|
||||
str(port),
|
||||
"--gpu-memory-utilization",
|
||||
"0.05",
|
||||
"--max-num-seqs",
|
||||
"2",
|
||||
"--disable-frontend-multiprocessing",
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
preexec_fn=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN),
|
||||
)
|
||||
|
||||
# Wait for server startup
|
||||
start_time = time.time()
|
||||
client = openai.AsyncOpenAI(
|
||||
base_url=f"http://localhost:{port}/v1",
|
||||
api_key="dummy",
|
||||
max_retries=0,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
# Poll until server is ready
|
||||
while time.time() - start_time < 30:
|
||||
try:
|
||||
await client.completions.create(
|
||||
model=MODEL_NAME, prompt="Hello", max_tokens=1
|
||||
)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
if proc.poll() is not None:
|
||||
stdout, stderr = proc.communicate(timeout=1)
|
||||
pytest.fail(
|
||||
f"Server died during startup. stdout: {stdout}, stderr: {stderr}"
|
||||
)
|
||||
else:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=5)
|
||||
pytest.fail("Server failed to start in 30 seconds")
|
||||
|
||||
# Kill server to simulate crash
|
||||
proc.terminate()
|
||||
time.sleep(1)
|
||||
|
||||
# Verify API calls now fail
|
||||
with pytest.raises((openai.APIConnectionError, openai.APIStatusError)):
|
||||
await client.completions.create(
|
||||
model=MODEL_NAME, prompt="This should fail", max_tokens=1
|
||||
)
|
||||
|
||||
return_code = proc.wait(timeout=5)
|
||||
assert return_code is not None
|
||||
Reference in New Issue
Block a user