|
|
|
|
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
|
|
|
|
|
This supports testing the Response API against real cloud providers.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import socket
|
|
|
|
|
import subprocess
|
|
|
|
|
@@ -16,6 +17,8 @@ from typing import Optional
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wait_for_workers_ready(
|
|
|
|
|
router_url: str,
|
|
|
|
|
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
|
|
|
|
|
attempt += 1
|
|
|
|
|
elapsed = int(time.time() - start_time)
|
|
|
|
|
|
|
|
|
|
# Print progress every 10 seconds
|
|
|
|
|
# Log progress every 10 seconds
|
|
|
|
|
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
|
|
|
|
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
|
|
|
|
|
logger.info(
|
|
|
|
|
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = session.get(
|
|
|
|
|
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
|
|
|
|
|
total_workers = data.get("total", 0)
|
|
|
|
|
|
|
|
|
|
if total_workers == expected_workers:
|
|
|
|
|
print(
|
|
|
|
|
logger.info(
|
|
|
|
|
f" All {expected_workers} workers connected after {elapsed}s"
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
@@ -124,16 +129,18 @@ def wait_for_router_ready(
|
|
|
|
|
attempt += 1
|
|
|
|
|
elapsed = int(time.time() - start_time)
|
|
|
|
|
|
|
|
|
|
# Print progress every 10 seconds
|
|
|
|
|
# Log progress every 10 seconds
|
|
|
|
|
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
|
|
|
|
print(f" Still waiting for router... ({elapsed}/{timeout}s elapsed)")
|
|
|
|
|
logger.info(
|
|
|
|
|
f" Still waiting for router... ({elapsed}/{timeout}s elapsed)"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = session.get(
|
|
|
|
|
f"{router_url}/health", headers=headers, timeout=5
|
|
|
|
|
)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
print(f" Router ready after {elapsed}s")
|
|
|
|
|
logger.info(f" Router ready after {elapsed}s")
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
|
last_error = f"HTTP {response.status_code}"
|
|
|
|
|
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
|
|
|
|
|
else:
|
|
|
|
|
router_port = find_free_port()
|
|
|
|
|
|
|
|
|
|
print(f"\n{'='*70}")
|
|
|
|
|
print(f"Launching {backend.upper()} router")
|
|
|
|
|
print(f"{'='*70}")
|
|
|
|
|
print(f" Backend: {backend}")
|
|
|
|
|
print(f" Router port: {router_port}")
|
|
|
|
|
print(f" History backend: {history_backend}")
|
|
|
|
|
logger.info(f"\n{'='*70}")
|
|
|
|
|
logger.info(f"Launching {backend.upper()} router")
|
|
|
|
|
logger.info(f"{'='*70}")
|
|
|
|
|
logger.info(f" Backend: {backend}")
|
|
|
|
|
logger.info(f" Router port: {router_port}")
|
|
|
|
|
logger.info(f" History backend: {history_backend}")
|
|
|
|
|
|
|
|
|
|
# Determine worker URL based on backend
|
|
|
|
|
if backend == "openai":
|
|
|
|
|
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"Unsupported backend: {backend}")
|
|
|
|
|
|
|
|
|
|
print(f" Worker URL: {worker_url}")
|
|
|
|
|
logger.info(f" Worker URL: {worker_url}")
|
|
|
|
|
|
|
|
|
|
# Build router command
|
|
|
|
|
router_cmd = [
|
|
|
|
|
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
|
|
|
|
|
router_cmd.extend(router_args)
|
|
|
|
|
|
|
|
|
|
if show_output:
|
|
|
|
|
print(f" Command: {' '.join(router_cmd)}")
|
|
|
|
|
logger.info(f" Command: {' '.join(router_cmd)}")
|
|
|
|
|
|
|
|
|
|
# Set up environment with backend API key
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
wait_for_router_ready(router_url, timeout=timeout, api_key=None)
|
|
|
|
|
print(f"✓ Router ready at {router_url}")
|
|
|
|
|
logger.info(f"✓ Router ready at {router_url}")
|
|
|
|
|
except TimeoutError:
|
|
|
|
|
print(f"✗ Router failed to start")
|
|
|
|
|
logger.error(f"✗ Router failed to start")
|
|
|
|
|
# Cleanup: kill router
|
|
|
|
|
try:
|
|
|
|
|
router_proc.kill()
|
|
|
|
|
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
|
|
|
|
|
pass
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
print(f"\n{'='*70}")
|
|
|
|
|
print(f"✓ {backend.upper()} router ready!")
|
|
|
|
|
print(f" Router: {router_url}")
|
|
|
|
|
print(f"{'='*70}\n")
|
|
|
|
|
logger.info(f"\n{'='*70}")
|
|
|
|
|
logger.info(f"✓ {backend.upper()} router ready!")
|
|
|
|
|
logger.info(f" Router: {router_url}")
|
|
|
|
|
logger.info(f"{'='*70}\n")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"router": router_proc,
|
|
|
|
|
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
|
|
|
|
|
else:
|
|
|
|
|
router_port = find_free_port()
|
|
|
|
|
|
|
|
|
|
print(f"\n{'='*70}")
|
|
|
|
|
print(f"Launching gRPC cluster (separate workers + router)")
|
|
|
|
|
print(f"{'='*70}")
|
|
|
|
|
print(f" Model: {model}")
|
|
|
|
|
print(f" Router port: {router_port}")
|
|
|
|
|
print(f" Workers: {num_workers}")
|
|
|
|
|
print(f" TP size: {tp_size}")
|
|
|
|
|
print(f" Policy: {policy}")
|
|
|
|
|
logger.info(f"\n{'='*70}")
|
|
|
|
|
logger.info(f"Launching gRPC cluster (separate workers + router)")
|
|
|
|
|
logger.info(f"{'='*70}")
|
|
|
|
|
logger.info(f" Model: {model}")
|
|
|
|
|
logger.info(f" Router port: {router_port}")
|
|
|
|
|
logger.info(f" Workers: {num_workers}")
|
|
|
|
|
logger.info(f" TP size: {tp_size}")
|
|
|
|
|
logger.info(f" Policy: {policy}")
|
|
|
|
|
|
|
|
|
|
# Step 1: Launch workers with gRPC enabled
|
|
|
|
|
workers = []
|
|
|
|
|
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
|
|
|
|
|
worker_url = f"grpc://127.0.0.1:{worker_port}"
|
|
|
|
|
worker_urls.append(worker_url)
|
|
|
|
|
|
|
|
|
|
print(f"\n[Worker {i+1}/{num_workers}]")
|
|
|
|
|
print(f" Port: {worker_port}")
|
|
|
|
|
print(f" URL: {worker_url}")
|
|
|
|
|
logger.info(f"\n[Worker {i+1}/{num_workers}]")
|
|
|
|
|
logger.info(f" Port: {worker_port}")
|
|
|
|
|
logger.info(f" URL: {worker_url}")
|
|
|
|
|
|
|
|
|
|
# Build worker command
|
|
|
|
|
worker_cmd = [
|
|
|
|
|
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
workers.append(worker_proc)
|
|
|
|
|
print(f" PID: {worker_proc.pid}")
|
|
|
|
|
logger.info(f" PID: {worker_proc.pid}")
|
|
|
|
|
|
|
|
|
|
# Give workers a moment to start binding to ports
|
|
|
|
|
# The router will check worker health when it starts
|
|
|
|
|
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
|
|
|
|
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
|
|
|
|
time.sleep(20)
|
|
|
|
|
|
|
|
|
|
# Quick check: make sure worker processes are still alive
|
|
|
|
|
for i, worker in enumerate(workers):
|
|
|
|
|
if worker.poll() is not None:
|
|
|
|
|
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
|
|
|
|
|
logger.error(
|
|
|
|
|
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
|
|
|
|
|
)
|
|
|
|
|
# Cleanup: kill all workers
|
|
|
|
|
for w in workers:
|
|
|
|
|
try:
|
|
|
|
|
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
|
|
|
|
|
pass
|
|
|
|
|
raise RuntimeError(f"Worker {i+1} failed to start")
|
|
|
|
|
|
|
|
|
|
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
|
|
|
|
|
logger.info(
|
|
|
|
|
f"✓ All {num_workers} workers started (router will verify connectivity)"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 2: Launch router pointing to workers
|
|
|
|
|
print(f"\n[Router]")
|
|
|
|
|
print(f" Port: {router_port}")
|
|
|
|
|
print(f" Worker URLs: {', '.join(worker_urls)}")
|
|
|
|
|
logger.info(f"\n[Router]")
|
|
|
|
|
logger.info(f" Port: {router_port}")
|
|
|
|
|
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
|
|
|
|
|
|
|
|
|
|
# Build router command
|
|
|
|
|
router_cmd = [
|
|
|
|
|
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
|
|
|
|
|
router_cmd.extend(router_args)
|
|
|
|
|
|
|
|
|
|
if show_output:
|
|
|
|
|
print(f" Command: {' '.join(router_cmd)}")
|
|
|
|
|
logger.info(f" Command: {' '.join(router_cmd)}")
|
|
|
|
|
|
|
|
|
|
# Launch router
|
|
|
|
|
if show_output:
|
|
|
|
|
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
|
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
print(f" PID: {router_proc.pid}")
|
|
|
|
|
logger.info(f" PID: {router_proc.pid}")
|
|
|
|
|
|
|
|
|
|
# Wait for router to be ready
|
|
|
|
|
router_url = f"http://127.0.0.1:{router_port}"
|
|
|
|
|
print(f"\nWaiting for router to start at {router_url}...")
|
|
|
|
|
logger.info(f"\nWaiting for router to start at {router_url}...")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
wait_for_workers_ready(
|
|
|
|
|
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
|
|
|
|
|
)
|
|
|
|
|
print(f"✓ Router ready at {router_url}")
|
|
|
|
|
logger.info(f"✓ Router ready at {router_url}")
|
|
|
|
|
except TimeoutError:
|
|
|
|
|
print(f"✗ Router failed to start")
|
|
|
|
|
logger.error(f"✗ Router failed to start")
|
|
|
|
|
# Cleanup: kill router and all workers
|
|
|
|
|
try:
|
|
|
|
|
router_proc.kill()
|
|
|
|
|
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
|
|
|
|
|
pass
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
print(f"\n{'='*70}")
|
|
|
|
|
print(f"✓ gRPC cluster ready!")
|
|
|
|
|
print(f" Router: {router_url}")
|
|
|
|
|
print(f" Workers: {len(workers)}")
|
|
|
|
|
print(f"{'='*70}\n")
|
|
|
|
|
logger.info(f"\n{'='*70}")
|
|
|
|
|
logger.info(f"✓ gRPC cluster ready!")
|
|
|
|
|
logger.info(f" Router: {router_url}")
|
|
|
|
|
logger.info(f" Workers: {len(workers)}")
|
|
|
|
|
logger.info(f"{'='*70}\n")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"workers": workers,
|
|
|
|
|
|