sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct
This commit is contained in:
191
sgl-router/py_test/integration/test_circuit_breaker.py
Normal file
191
sgl-router/py_test/integration/test_circuit_breaker.py
Normal file
@@ -0,0 +1,191 @@
|
||||
import time
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_circuit_breaker_opens_and_recovers(router_manager, mock_workers):
|
||||
# A single worker that fails first 3 requests, then succeeds
|
||||
_, [wurl], _ = mock_workers(n=1, args=["--fail-first-n", "3"]) # fails first 3
|
||||
rh = router_manager.start_router(
|
||||
worker_urls=[wurl],
|
||||
policy="round_robin",
|
||||
extra={
|
||||
"cb_failure_threshold": 3,
|
||||
"cb_success_threshold": 2,
|
||||
"cb_timeout_duration_secs": 3,
|
||||
"cb_window_duration_secs": 10,
|
||||
"disable_retries": True,
|
||||
},
|
||||
)
|
||||
|
||||
def post_once():
|
||||
return requests.post(
|
||||
f"{rh.url}/v1/completions",
|
||||
json={
|
||||
"model": "test-model",
|
||||
"prompt": "trigger",
|
||||
"max_tokens": 1,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
saw_503 = False
|
||||
for _ in range(8):
|
||||
r = post_once()
|
||||
if r.status_code == 503:
|
||||
saw_503 = True
|
||||
break
|
||||
assert saw_503, "circuit breaker did not open to return 503"
|
||||
|
||||
time.sleep(4)
|
||||
r1 = post_once()
|
||||
r2 = post_once()
|
||||
assert r1.status_code == 200 and r2.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_circuit_breaker_half_open_failure_reopens(router_manager, mock_workers):
|
||||
_, [wurl], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
|
||||
rh = router_manager.start_router(
|
||||
worker_urls=[wurl],
|
||||
policy="round_robin",
|
||||
extra={
|
||||
"cb_failure_threshold": 2,
|
||||
"cb_success_threshold": 2,
|
||||
"cb_timeout_duration_secs": 2,
|
||||
"cb_window_duration_secs": 5,
|
||||
"disable_retries": True,
|
||||
},
|
||||
)
|
||||
|
||||
def post_once():
|
||||
return requests.post(
|
||||
f"{rh.url}/v1/completions",
|
||||
json={
|
||||
"model": "test-model",
|
||||
"prompt": "x",
|
||||
"max_tokens": 1,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
opened = False
|
||||
for _ in range(8):
|
||||
r = post_once()
|
||||
if r.status_code == 503:
|
||||
opened = True
|
||||
break
|
||||
assert opened, "circuit breaker did not open"
|
||||
|
||||
time.sleep(3)
|
||||
r = post_once()
|
||||
assert r.status_code == 500
|
||||
r2 = post_once()
|
||||
assert r2.status_code == 503
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_circuit_breaker_disable_flag(router_manager, mock_workers):
|
||||
_, [wurl], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
|
||||
rh = router_manager.start_router(
|
||||
worker_urls=[wurl],
|
||||
policy="round_robin",
|
||||
extra={
|
||||
"disable_circuit_breaker": True,
|
||||
"disable_retries": True,
|
||||
},
|
||||
)
|
||||
r = requests.post(
|
||||
f"{rh.url}/v1/completions",
|
||||
json={
|
||||
"model": "test-model",
|
||||
"prompt": "x",
|
||||
"max_tokens": 1,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=3,
|
||||
)
|
||||
assert r.status_code == 500
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_circuit_breaker_per_worker_isolation(router_manager, mock_workers):
|
||||
_, [fail_url], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
|
||||
_, [ok_url], _ = mock_workers(n=1)
|
||||
rh = router_manager.start_router(
|
||||
worker_urls=[fail_url, ok_url],
|
||||
policy="round_robin",
|
||||
extra={
|
||||
"cb_failure_threshold": 2,
|
||||
"cb_success_threshold": 1,
|
||||
"cb_timeout_duration_secs": 2,
|
||||
"cb_window_duration_secs": 10,
|
||||
"disable_retries": True,
|
||||
},
|
||||
)
|
||||
|
||||
def post_once():
|
||||
return requests.post(
|
||||
f"{rh.url}/v1/completions",
|
||||
json={
|
||||
"model": "test-model",
|
||||
"prompt": "y",
|
||||
"max_tokens": 1,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
failures = 0
|
||||
successes_after_open = 0
|
||||
opened = False
|
||||
for _ in range(30):
|
||||
r = post_once()
|
||||
if not opened:
|
||||
if r.status_code == 500:
|
||||
failures += 1
|
||||
if failures >= 2:
|
||||
_ = post_once()
|
||||
_ = post_once()
|
||||
opened = True
|
||||
else:
|
||||
if r.status_code == 200:
|
||||
successes_after_open += 1
|
||||
else:
|
||||
assert False, f"Unexpected non-200 after CB open: {r.status_code}"
|
||||
assert opened and successes_after_open >= 5
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_circuit_breaker_with_retries(router_manager, mock_workers):
|
||||
_, [fail_url], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
|
||||
_, [ok_url], _ = mock_workers(n=1)
|
||||
rh = router_manager.start_router(
|
||||
worker_urls=[fail_url, ok_url],
|
||||
policy="round_robin",
|
||||
extra={
|
||||
"retry_max_retries": 3,
|
||||
"retry_initial_backoff_ms": 10,
|
||||
"retry_max_backoff_ms": 50,
|
||||
"cb_failure_threshold": 2,
|
||||
"cb_success_threshold": 1,
|
||||
"cb_timeout_duration_secs": 2,
|
||||
"cb_window_duration_secs": 10,
|
||||
},
|
||||
)
|
||||
|
||||
r = requests.post(
|
||||
f"{rh.url}/v1/completions",
|
||||
json={
|
||||
"model": "test-model",
|
||||
"prompt": "z",
|
||||
"max_tokens": 1,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=5,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
Reference in New Issue
Block a user