sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

This commit is contained in:
maxiao1
2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions

View File

@@ -0,0 +1,36 @@
import concurrent.futures
import subprocess
import time
import pytest
import requests
@pytest.mark.integration
def test_worker_crash_reroute_with_retries(router_manager, mock_workers):
# Start one healthy and one that will crash on first request
_, [ok_url], _ = mock_workers(n=1)
_, [crash_url], _ = mock_workers(n=1, args=["--crash-on-request"])
rh = router_manager.start_router(
worker_urls=[crash_url, ok_url],
policy="round_robin",
extra={
"retry_max_retries": 3,
"retry_initial_backoff_ms": 10,
"retry_max_backoff_ms": 50,
},
)
# A single request should succeed via retry to the healthy worker
r = requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "crash",
"max_tokens": 1,
"stream": False,
},
timeout=5,
)
assert r.status_code == 200
# mock_workers fixture handles cleanup