34 lines
1001 B
Python
34 lines
1001 B
Python
import pytest
|
|
import requests
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_payload_size_limit(router_manager, mock_workers):
|
|
# Start one backend and a router with a 1MB payload limit
|
|
_, urls, _ = mock_workers(n=1)
|
|
rh = router_manager.start_router(
|
|
worker_urls=urls,
|
|
policy="round_robin",
|
|
extra={"max_payload_size": 1 * 1024 * 1024}, # 1MB
|
|
)
|
|
|
|
# Payload just under 1MB should succeed
|
|
payload_small = {
|
|
"model": "test-model",
|
|
"prompt": "x" * int(0.5 * 1024 * 1024), # ~0.5MB
|
|
"max_tokens": 1,
|
|
"stream": False,
|
|
}
|
|
r = requests.post(f"{rh.url}/v1/completions", json=payload_small)
|
|
assert r.status_code == 200
|
|
|
|
# Payload over 1MB should fail with 413
|
|
payload_large = {
|
|
"model": "test-model",
|
|
"prompt": "x" * int(1.2 * 1024 * 1024), # ~1.2MB
|
|
"max_tokens": 1,
|
|
"stream": False,
|
|
}
|
|
r = requests.post(f"{rh.url}/v1/completions", json=payload_large)
|
|
assert r.status_code == 413
|