[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)
This commit is contained in:
@@ -30,7 +30,7 @@ repos:
|
|||||||
args:
|
args:
|
||||||
- --select=F401,F821
|
- --select=F401,F821
|
||||||
- --fix
|
- --fix
|
||||||
files: ^(benchmark/|docs/|examples/|python/sglang/)
|
files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
|
||||||
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
|
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 24.10.0
|
rev: 24.10.0
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import time
|
import time
|
||||||
from types import SimpleNamespace
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
|||||||
@@ -13,14 +13,11 @@ Run with:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import requests
|
|
||||||
|
|
||||||
_TEST_DIR = Path(__file__).parent
|
_TEST_DIR = Path(__file__).parent
|
||||||
sys.path.insert(0, str(_TEST_DIR.parent))
|
sys.path.insert(0, str(_TEST_DIR.parent))
|
||||||
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
|
|||||||
try:
|
try:
|
||||||
js_obj = json.loads(text)
|
js_obj = json.loads(text)
|
||||||
except (TypeError, json.decoder.JSONDecodeError):
|
except (TypeError, json.decoder.JSONDecodeError):
|
||||||
print("JSONDecodeError", text)
|
|
||||||
raise
|
raise
|
||||||
assert isinstance(js_obj["name"], str)
|
assert isinstance(js_obj["name"], str)
|
||||||
assert isinstance(js_obj["population"], int)
|
assert isinstance(js_obj["population"], int)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest # noqa: F401
|
||||||
|
|
||||||
# Ensure router py_src is importable
|
# Ensure router py_src is importable
|
||||||
_ROUTER_ROOT = Path(__file__).resolve().parents[2]
|
_ROUTER_ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
|||||||
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
|
|||||||
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
|
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import openai
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
_TEST_DIR = Path(__file__).parent
|
_TEST_DIR = Path(__file__).parent
|
||||||
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
|
|||||||
from fixtures import popen_launch_workers_and_router
|
from fixtures import popen_launch_workers_and_router
|
||||||
from util import (
|
from util import (
|
||||||
DEFAULT_ENABLE_THINKING_MODEL_PATH,
|
DEFAULT_ENABLE_THINKING_MODEL_PATH,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
CustomTestCase,
|
CustomTestCase,
|
||||||
get_tokenizer,
|
|
||||||
kill_process_tree,
|
kill_process_tree,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
|
|||||||
has_reasoning = False
|
has_reasoning = False
|
||||||
has_content = False
|
has_content = False
|
||||||
|
|
||||||
print("\n=== Stream With Reasoning ===")
|
|
||||||
for line in response.iter_lines():
|
for line in response.iter_lines():
|
||||||
if line:
|
if line:
|
||||||
line = line.decode("utf-8")
|
line = line.decode("utf-8")
|
||||||
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
|
|||||||
has_reasoning = False
|
has_reasoning = False
|
||||||
has_content = False
|
has_content = False
|
||||||
|
|
||||||
print("\n=== Stream Without Reasoning ===")
|
|
||||||
for line in response.iter_lines():
|
for line in response.iter_lines():
|
||||||
if line:
|
if line:
|
||||||
line = line.decode("utf-8")
|
line = line.decode("utf-8")
|
||||||
|
|||||||
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
|
|||||||
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
|
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import requests
|
|
||||||
|
|
||||||
_TEST_DIR = Path(__file__).parent
|
_TEST_DIR = Path(__file__).parent
|
||||||
sys.path.insert(0, str(_TEST_DIR.parent))
|
sys.path.insert(0, str(_TEST_DIR.parent))
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
|
|||||||
This approach gives more control and matches production deployment patterns.
|
This approach gives more control and matches production deployment patterns.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
@@ -15,6 +16,8 @@ from typing import Optional
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def find_free_port() -> int:
|
def find_free_port() -> int:
|
||||||
"""Find an available port on localhost."""
|
"""Find an available port on localhost."""
|
||||||
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
|
|||||||
attempt += 1
|
attempt += 1
|
||||||
elapsed = int(time.time() - start_time)
|
elapsed = int(time.time() - start_time)
|
||||||
|
|
||||||
# Print progress every 10 seconds
|
# Log progress every 10 seconds
|
||||||
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
||||||
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
|
logger.info(
|
||||||
|
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = session.get(
|
response = session.get(
|
||||||
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
|
|||||||
total_workers = data.get("total", 0)
|
total_workers = data.get("total", 0)
|
||||||
|
|
||||||
if total_workers == expected_workers:
|
if total_workers == expected_workers:
|
||||||
print(
|
logger.info(
|
||||||
f" All {expected_workers} workers connected after {elapsed}s"
|
f" All {expected_workers} workers connected after {elapsed}s"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
|
|||||||
else:
|
else:
|
||||||
router_port = find_free_port()
|
router_port = find_free_port()
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"Launching gRPC cluster (separate workers + router)")
|
logger.info(f"Launching gRPC cluster (separate workers + router)")
|
||||||
print(f"{'='*70}")
|
logger.info(f"{'='*70}")
|
||||||
print(f" Model: {model}")
|
logger.info(f" Model: {model}")
|
||||||
print(f" Router port: {router_port}")
|
logger.info(f" Router port: {router_port}")
|
||||||
print(f" Workers: {num_workers}")
|
logger.info(f" Workers: {num_workers}")
|
||||||
print(f" TP size: {tp_size}")
|
logger.info(f" TP size: {tp_size}")
|
||||||
print(f" Policy: {policy}")
|
logger.info(f" Policy: {policy}")
|
||||||
|
|
||||||
# Step 1: Launch workers with gRPC enabled
|
# Step 1: Launch workers with gRPC enabled
|
||||||
workers = []
|
workers = []
|
||||||
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
|
|||||||
worker_url = f"grpc://127.0.0.1:{worker_port}"
|
worker_url = f"grpc://127.0.0.1:{worker_port}"
|
||||||
worker_urls.append(worker_url)
|
worker_urls.append(worker_url)
|
||||||
|
|
||||||
print(f"\n[Worker {i+1}/{num_workers}]")
|
logger.info(f"\n[Worker {i+1}/{num_workers}]")
|
||||||
print(f" Port: {worker_port}")
|
logger.info(f" Port: {worker_port}")
|
||||||
print(f" URL: {worker_url}")
|
logger.info(f" URL: {worker_url}")
|
||||||
|
|
||||||
# Build worker command
|
# Build worker command
|
||||||
worker_cmd = [
|
worker_cmd = [
|
||||||
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
|
|||||||
)
|
)
|
||||||
|
|
||||||
workers.append(worker_proc)
|
workers.append(worker_proc)
|
||||||
print(f" PID: {worker_proc.pid}")
|
logger.info(f" PID: {worker_proc.pid}")
|
||||||
|
|
||||||
# Give workers a moment to start binding to ports
|
# Give workers a moment to start binding to ports
|
||||||
# The router will check worker health when it starts
|
# The router will check worker health when it starts
|
||||||
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
||||||
time.sleep(20)
|
time.sleep(20)
|
||||||
|
|
||||||
# Quick check: make sure worker processes are still alive
|
# Quick check: make sure worker processes are still alive
|
||||||
for i, worker in enumerate(workers):
|
for i, worker in enumerate(workers):
|
||||||
if worker.poll() is not None:
|
if worker.poll() is not None:
|
||||||
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
|
logger.error(
|
||||||
|
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
|
||||||
|
)
|
||||||
# Cleanup: kill all workers
|
# Cleanup: kill all workers
|
||||||
for w in workers:
|
for w in workers:
|
||||||
try:
|
try:
|
||||||
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
|
|||||||
pass
|
pass
|
||||||
raise RuntimeError(f"Worker {i+1} failed to start")
|
raise RuntimeError(f"Worker {i+1} failed to start")
|
||||||
|
|
||||||
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
|
logger.info(
|
||||||
|
f"✓ All {num_workers} workers started (router will verify connectivity)"
|
||||||
|
)
|
||||||
|
|
||||||
# Step 2: Launch router pointing to workers
|
# Step 2: Launch router pointing to workers
|
||||||
print(f"\n[Router]")
|
logger.info(f"\n[Router]")
|
||||||
print(f" Port: {router_port}")
|
logger.info(f" Port: {router_port}")
|
||||||
print(f" Worker URLs: {', '.join(worker_urls)}")
|
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
|
||||||
|
|
||||||
# Build router command
|
# Build router command
|
||||||
router_cmd = [
|
router_cmd = [
|
||||||
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
|
|||||||
router_cmd.extend(router_args)
|
router_cmd.extend(router_args)
|
||||||
|
|
||||||
if show_output:
|
if show_output:
|
||||||
print(f" Command: {' '.join(router_cmd)}")
|
logger.info(f" Command: {' '.join(router_cmd)}")
|
||||||
|
|
||||||
# Launch router
|
# Launch router
|
||||||
if show_output:
|
if show_output:
|
||||||
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
|
|||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f" PID: {router_proc.pid}")
|
logger.info(f" PID: {router_proc.pid}")
|
||||||
|
|
||||||
# Wait for router to be ready
|
# Wait for router to be ready
|
||||||
router_url = f"http://127.0.0.1:{router_port}"
|
router_url = f"http://127.0.0.1:{router_port}"
|
||||||
print(f"\nWaiting for router to start at {router_url}...")
|
logger.info(f"\nWaiting for router to start at {router_url}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
wait_for_workers_ready(
|
wait_for_workers_ready(
|
||||||
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
|
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
|
||||||
)
|
)
|
||||||
print(f"✓ Router ready at {router_url}")
|
logger.info(f"✓ Router ready at {router_url}")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
print(f"✗ Router failed to start")
|
logger.error(f"✗ Router failed to start")
|
||||||
# Cleanup: kill router and all workers
|
# Cleanup: kill router and all workers
|
||||||
try:
|
try:
|
||||||
router_proc.kill()
|
router_proc.kill()
|
||||||
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
|
|||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"✓ gRPC cluster ready!")
|
logger.info(f"✓ gRPC cluster ready!")
|
||||||
print(f" Router: {router_url}")
|
logger.info(f" Router: {router_url}")
|
||||||
print(f" Workers: {len(workers)}")
|
logger.info(f" Workers: {len(workers)}")
|
||||||
print(f"{'='*70}\n")
|
logger.info(f"{'='*70}\n")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"workers": workers,
|
"workers": workers,
|
||||||
|
|||||||
@@ -13,10 +13,7 @@ Run with:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|||||||
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ Extracted and adapted from:
|
|||||||
- sglang.test.test_utils (constants and CustomTestCase)
|
- sglang.test.test_utils (constants and CustomTestCase)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import threading
|
import threading
|
||||||
@@ -17,6 +18,8 @@ from typing import Optional, Union
|
|||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
@@ -204,8 +207,8 @@ def get_tokenizer(
|
|||||||
raise RuntimeError(err_msg) from e
|
raise RuntimeError(err_msg) from e
|
||||||
|
|
||||||
if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
||||||
print(
|
logger.warning(
|
||||||
f"Warning: Using a slow tokenizer. This might cause a performance "
|
f"Using a slow tokenizer. This might cause a performance "
|
||||||
f"degradation. Consider using a fast tokenizer instead."
|
f"degradation. Consider using a fast tokenizer instead."
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
|
|||||||
return super(CustomTestCase, self)._callTestMethod(method)
|
return super(CustomTestCase, self)._callTestMethod(method)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if attempt < max_retry:
|
if attempt < max_retry:
|
||||||
print(
|
logger.info(
|
||||||
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
|
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# Last attempt, re-raise the exception
|
# Last attempt, re-raise the exception
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
"""Print test method name at the start of each test."""
|
|
||||||
print(f"[Test Method] {self._testMethodName}", flush=True)
|
|
||||||
|
|||||||
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
|
|||||||
self.stderr.flush()
|
self.stderr.flush()
|
||||||
lines = open(STDERR_FILENAME).readlines()
|
lines = open(STDERR_FILENAME).readlines()
|
||||||
for line in lines[pt:]:
|
for line in lines[pt:]:
|
||||||
print(line, end="", flush=True)
|
|
||||||
if f"#running-req: {num_requests}" in line:
|
if f"#running-req: {num_requests}" in line:
|
||||||
all_requests_running = True
|
all_requests_running = True
|
||||||
pt = -1
|
pt = -1
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ Run with:
|
|||||||
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
|
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# CHANGE: Import router launcher instead of server launcher
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
|
|||||||
This configures pytest to not collect base test classes that are meant to be inherited.
|
This configures pytest to not collect base test classes that are meant to be inherited.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pytest
|
import pytest # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
def pytest_collection_modifyitems(config, items):
|
def pytest_collection_modifyitems(config, items):
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
|
|||||||
self.assertEqual(resp.status_code, 200)
|
self.assertEqual(resp.status_code, 200)
|
||||||
|
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
print(f"MCP response: {data}")
|
|
||||||
|
|
||||||
# Basic response structure
|
# Basic response structure
|
||||||
self.assertIn("id", data)
|
self.assertIn("id", data)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
|
|||||||
This supports testing the Response API against real cloud providers.
|
This supports testing the Response API against real cloud providers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -16,6 +17,8 @@ from typing import Optional
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def wait_for_workers_ready(
|
def wait_for_workers_ready(
|
||||||
router_url: str,
|
router_url: str,
|
||||||
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
|
|||||||
attempt += 1
|
attempt += 1
|
||||||
elapsed = int(time.time() - start_time)
|
elapsed = int(time.time() - start_time)
|
||||||
|
|
||||||
# Print progress every 10 seconds
|
# Log progress every 10 seconds
|
||||||
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
||||||
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
|
logger.info(
|
||||||
|
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = session.get(
|
response = session.get(
|
||||||
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
|
|||||||
total_workers = data.get("total", 0)
|
total_workers = data.get("total", 0)
|
||||||
|
|
||||||
if total_workers == expected_workers:
|
if total_workers == expected_workers:
|
||||||
print(
|
logger.info(
|
||||||
f" All {expected_workers} workers connected after {elapsed}s"
|
f" All {expected_workers} workers connected after {elapsed}s"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
@@ -124,16 +129,18 @@ def wait_for_router_ready(
|
|||||||
attempt += 1
|
attempt += 1
|
||||||
elapsed = int(time.time() - start_time)
|
elapsed = int(time.time() - start_time)
|
||||||
|
|
||||||
# Print progress every 10 seconds
|
# Log progress every 10 seconds
|
||||||
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
|
||||||
print(f" Still waiting for router... ({elapsed}/{timeout}s elapsed)")
|
logger.info(
|
||||||
|
f" Still waiting for router... ({elapsed}/{timeout}s elapsed)"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = session.get(
|
response = session.get(
|
||||||
f"{router_url}/health", headers=headers, timeout=5
|
f"{router_url}/health", headers=headers, timeout=5
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
print(f" Router ready after {elapsed}s")
|
logger.info(f" Router ready after {elapsed}s")
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
last_error = f"HTTP {response.status_code}"
|
last_error = f"HTTP {response.status_code}"
|
||||||
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
|
|||||||
else:
|
else:
|
||||||
router_port = find_free_port()
|
router_port = find_free_port()
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"Launching {backend.upper()} router")
|
logger.info(f"Launching {backend.upper()} router")
|
||||||
print(f"{'='*70}")
|
logger.info(f"{'='*70}")
|
||||||
print(f" Backend: {backend}")
|
logger.info(f" Backend: {backend}")
|
||||||
print(f" Router port: {router_port}")
|
logger.info(f" Router port: {router_port}")
|
||||||
print(f" History backend: {history_backend}")
|
logger.info(f" History backend: {history_backend}")
|
||||||
|
|
||||||
# Determine worker URL based on backend
|
# Determine worker URL based on backend
|
||||||
if backend == "openai":
|
if backend == "openai":
|
||||||
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported backend: {backend}")
|
raise ValueError(f"Unsupported backend: {backend}")
|
||||||
|
|
||||||
print(f" Worker URL: {worker_url}")
|
logger.info(f" Worker URL: {worker_url}")
|
||||||
|
|
||||||
# Build router command
|
# Build router command
|
||||||
router_cmd = [
|
router_cmd = [
|
||||||
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
|
|||||||
router_cmd.extend(router_args)
|
router_cmd.extend(router_args)
|
||||||
|
|
||||||
if show_output:
|
if show_output:
|
||||||
print(f" Command: {' '.join(router_cmd)}")
|
logger.info(f" Command: {' '.join(router_cmd)}")
|
||||||
|
|
||||||
# Set up environment with backend API key
|
# Set up environment with backend API key
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
wait_for_router_ready(router_url, timeout=timeout, api_key=None)
|
wait_for_router_ready(router_url, timeout=timeout, api_key=None)
|
||||||
print(f"✓ Router ready at {router_url}")
|
logger.info(f"✓ Router ready at {router_url}")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
print(f"✗ Router failed to start")
|
logger.error(f"✗ Router failed to start")
|
||||||
# Cleanup: kill router
|
# Cleanup: kill router
|
||||||
try:
|
try:
|
||||||
router_proc.kill()
|
router_proc.kill()
|
||||||
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
|
|||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"✓ {backend.upper()} router ready!")
|
logger.info(f"✓ {backend.upper()} router ready!")
|
||||||
print(f" Router: {router_url}")
|
logger.info(f" Router: {router_url}")
|
||||||
print(f"{'='*70}\n")
|
logger.info(f"{'='*70}\n")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"router": router_proc,
|
"router": router_proc,
|
||||||
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
|
|||||||
else:
|
else:
|
||||||
router_port = find_free_port()
|
router_port = find_free_port()
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"Launching gRPC cluster (separate workers + router)")
|
logger.info(f"Launching gRPC cluster (separate workers + router)")
|
||||||
print(f"{'='*70}")
|
logger.info(f"{'='*70}")
|
||||||
print(f" Model: {model}")
|
logger.info(f" Model: {model}")
|
||||||
print(f" Router port: {router_port}")
|
logger.info(f" Router port: {router_port}")
|
||||||
print(f" Workers: {num_workers}")
|
logger.info(f" Workers: {num_workers}")
|
||||||
print(f" TP size: {tp_size}")
|
logger.info(f" TP size: {tp_size}")
|
||||||
print(f" Policy: {policy}")
|
logger.info(f" Policy: {policy}")
|
||||||
|
|
||||||
# Step 1: Launch workers with gRPC enabled
|
# Step 1: Launch workers with gRPC enabled
|
||||||
workers = []
|
workers = []
|
||||||
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
|
|||||||
worker_url = f"grpc://127.0.0.1:{worker_port}"
|
worker_url = f"grpc://127.0.0.1:{worker_port}"
|
||||||
worker_urls.append(worker_url)
|
worker_urls.append(worker_url)
|
||||||
|
|
||||||
print(f"\n[Worker {i+1}/{num_workers}]")
|
logger.info(f"\n[Worker {i+1}/{num_workers}]")
|
||||||
print(f" Port: {worker_port}")
|
logger.info(f" Port: {worker_port}")
|
||||||
print(f" URL: {worker_url}")
|
logger.info(f" URL: {worker_url}")
|
||||||
|
|
||||||
# Build worker command
|
# Build worker command
|
||||||
worker_cmd = [
|
worker_cmd = [
|
||||||
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
|
|||||||
)
|
)
|
||||||
|
|
||||||
workers.append(worker_proc)
|
workers.append(worker_proc)
|
||||||
print(f" PID: {worker_proc.pid}")
|
logger.info(f" PID: {worker_proc.pid}")
|
||||||
|
|
||||||
# Give workers a moment to start binding to ports
|
# Give workers a moment to start binding to ports
|
||||||
# The router will check worker health when it starts
|
# The router will check worker health when it starts
|
||||||
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
|
||||||
time.sleep(20)
|
time.sleep(20)
|
||||||
|
|
||||||
# Quick check: make sure worker processes are still alive
|
# Quick check: make sure worker processes are still alive
|
||||||
for i, worker in enumerate(workers):
|
for i, worker in enumerate(workers):
|
||||||
if worker.poll() is not None:
|
if worker.poll() is not None:
|
||||||
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
|
logger.error(
|
||||||
|
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
|
||||||
|
)
|
||||||
# Cleanup: kill all workers
|
# Cleanup: kill all workers
|
||||||
for w in workers:
|
for w in workers:
|
||||||
try:
|
try:
|
||||||
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
|
|||||||
pass
|
pass
|
||||||
raise RuntimeError(f"Worker {i+1} failed to start")
|
raise RuntimeError(f"Worker {i+1} failed to start")
|
||||||
|
|
||||||
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
|
logger.info(
|
||||||
|
f"✓ All {num_workers} workers started (router will verify connectivity)"
|
||||||
|
)
|
||||||
|
|
||||||
# Step 2: Launch router pointing to workers
|
# Step 2: Launch router pointing to workers
|
||||||
print(f"\n[Router]")
|
logger.info(f"\n[Router]")
|
||||||
print(f" Port: {router_port}")
|
logger.info(f" Port: {router_port}")
|
||||||
print(f" Worker URLs: {', '.join(worker_urls)}")
|
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
|
||||||
|
|
||||||
# Build router command
|
# Build router command
|
||||||
router_cmd = [
|
router_cmd = [
|
||||||
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
|
|||||||
router_cmd.extend(router_args)
|
router_cmd.extend(router_args)
|
||||||
|
|
||||||
if show_output:
|
if show_output:
|
||||||
print(f" Command: {' '.join(router_cmd)}")
|
logger.info(f" Command: {' '.join(router_cmd)}")
|
||||||
|
|
||||||
# Launch router
|
# Launch router
|
||||||
if show_output:
|
if show_output:
|
||||||
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
|
|||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f" PID: {router_proc.pid}")
|
logger.info(f" PID: {router_proc.pid}")
|
||||||
|
|
||||||
# Wait for router to be ready
|
# Wait for router to be ready
|
||||||
router_url = f"http://127.0.0.1:{router_port}"
|
router_url = f"http://127.0.0.1:{router_port}"
|
||||||
print(f"\nWaiting for router to start at {router_url}...")
|
logger.info(f"\nWaiting for router to start at {router_url}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
wait_for_workers_ready(
|
wait_for_workers_ready(
|
||||||
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
|
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
|
||||||
)
|
)
|
||||||
print(f"✓ Router ready at {router_url}")
|
logger.info(f"✓ Router ready at {router_url}")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
print(f"✗ Router failed to start")
|
logger.error(f"✗ Router failed to start")
|
||||||
# Cleanup: kill router and all workers
|
# Cleanup: kill router and all workers
|
||||||
try:
|
try:
|
||||||
router_proc.kill()
|
router_proc.kill()
|
||||||
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
|
|||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
logger.info(f"\n{'='*70}")
|
||||||
print(f"✓ gRPC cluster ready!")
|
logger.info(f"✓ gRPC cluster ready!")
|
||||||
print(f" Router: {router_url}")
|
logger.info(f" Router: {router_url}")
|
||||||
print(f" Workers: {len(workers)}")
|
logger.info(f" Workers: {len(workers)}")
|
||||||
print(f"{'='*70}\n")
|
logger.info(f"{'='*70}\n")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"workers": workers,
|
"workers": workers,
|
||||||
|
|||||||
@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
|
|||||||
resp = self.create_response(
|
resp = self.create_response(
|
||||||
"Test", previous_response_id="resp_invalid123", max_output_tokens=50
|
"Test", previous_response_id="resp_invalid123", max_output_tokens=50
|
||||||
)
|
)
|
||||||
# Should return 404 or 400 for invalid response ID
|
|
||||||
if resp.status_code != 200:
|
|
||||||
print(f"\n❌ Response creation failed!")
|
|
||||||
print(f"Status: {resp.status_code}")
|
|
||||||
print(f"Response: {resp.text}")
|
|
||||||
self.assertIn(resp.status_code, [400, 404])
|
self.assertIn(resp.status_code, [400, 404])
|
||||||
|
|
||||||
def test_conversation_with_multiple_turns(self):
|
def test_conversation_with_multiple_turns(self):
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
Utility functions for Response API e2e tests.
|
Utility functions for Response API e2e tests.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import threading
|
import threading
|
||||||
@@ -9,6 +10,8 @@ import unittest
|
|||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
|
def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
|
||||||
"""
|
"""
|
||||||
@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
|
|||||||
return super(CustomTestCase, self)._callTestMethod(method)
|
return super(CustomTestCase, self)._callTestMethod(method)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if attempt < max_retry:
|
if attempt < max_retry:
|
||||||
print(
|
logger.info(
|
||||||
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
|
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# Last attempt, re-raise the exception
|
# Last attempt, re-raise the exception
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
"""Print test method name at the start of each test."""
|
|
||||||
print(f"[Test Method] {self._testMethodName}", flush=True)
|
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
import os
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
|
|||||||
urls = urls_slow + urls_fast
|
urls = urls_slow + urls_fast
|
||||||
ids = ids_slow + ids_fast
|
ids = ids_slow + ids_fast
|
||||||
slow_id = ids_slow[0]
|
slow_id = ids_slow[0]
|
||||||
|
slow_url = urls_slow[0]
|
||||||
|
|
||||||
rh = router_manager.start_router(
|
rh = router_manager.start_router(
|
||||||
worker_urls=urls,
|
worker_urls=urls,
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import collections
|
import collections
|
||||||
import math
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import collections
|
import collections
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
import concurrent.futures
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import collections
|
import collections
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import subprocess
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
import concurrent.futures
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
import collections
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,7 @@ These tests focus on testing the argument parsing logic in isolation,
|
|||||||
without starting actual router instances.
|
without starting actual router instances.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from sglang_router.launch_router import RouterArgs, parse_router_args
|
from sglang_router.launch_router import RouterArgs, parse_router_args
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ These tests focus on testing the router configuration logic in isolation,
|
|||||||
including validation of configuration parameters and their interactions.
|
including validation of configuration parameters and their interactions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from types import SimpleNamespace
|
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ including router initialization, configuration validation, and startup flow.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from types import SimpleNamespace
|
from unittest.mock import MagicMock, patch
|
||||||
from unittest.mock import MagicMock, call, patch
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from sglang_router.launch_router import RouterArgs, launch_router
|
from sglang_router.launch_router import RouterArgs, launch_router
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ These tests focus on testing the validation logic in isolation,
|
|||||||
including parameter validation, URL validation, and configuration validation.
|
including parameter validation, URL validation, and configuration validation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from types import SimpleNamespace
|
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|||||||
Reference in New Issue
Block a user