[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)

This commit is contained in:
Chang Su
2025-10-23 11:56:57 -07:00
committed by GitHub
parent 8bd26dd4e6
commit 28b8a4064d
31 changed files with 115 additions and 153 deletions

View File

@@ -13,14 +13,11 @@ Run with:
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent))
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
try:
js_obj = json.loads(text)
except (TypeError, json.decoder.JSONDecodeError):
print("JSONDecodeError", text)
raise
assert isinstance(js_obj["name"], str)
assert isinstance(js_obj["population"], int)

View File

@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
import sys
from pathlib import Path
import pytest
import pytest # noqa: F401
# Ensure router py_src is importable
_ROUTER_ROOT = Path(__file__).resolve().parents[2]

View File

@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
import asyncio
import json
import os
import sys
import time
import unittest
# CHANGE: Import router launcher instead of server launcher
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
from fixtures import popen_launch_workers_and_router
from util import (
DEFAULT_ENABLE_THINKING_MODEL_PATH,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
get_tokenizer,
kill_process_tree,
)
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False
has_content = False
print("\n=== Stream With Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
has_reasoning = False
has_content = False
print("\n=== Stream Without Reasoning ===")
for line in response.iter_lines():
if line:
line = line.decode("utf-8")

View File

@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path
import openai
import requests
_TEST_DIR = Path(__file__).parent
sys.path.insert(0, str(_TEST_DIR.parent))

View File

@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
This approach gives more control and matches production deployment patterns.
"""
import logging
import socket
import subprocess
import time
@@ -15,6 +16,8 @@ from typing import Optional
import requests
logger = logging.getLogger(__name__)
def find_free_port() -> int:
"""Find an available port on localhost."""
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
attempt += 1
elapsed = int(time.time() - start_time)
# Print progress every 10 seconds
# Log progress every 10 seconds
if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
logger.info(
f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
)
try:
response = session.get(
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
total_workers = data.get("total", 0)
if total_workers == expected_workers:
print(
logger.info(
f" All {expected_workers} workers connected after {elapsed}s"
)
return
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
else:
router_port = find_free_port()
print(f"\n{'='*70}")
print(f"Launching gRPC cluster (separate workers + router)")
print(f"{'='*70}")
print(f" Model: {model}")
print(f" Router port: {router_port}")
print(f" Workers: {num_workers}")
print(f" TP size: {tp_size}")
print(f" Policy: {policy}")
logger.info(f"\n{'='*70}")
logger.info(f"Launching gRPC cluster (separate workers + router)")
logger.info(f"{'='*70}")
logger.info(f" Model: {model}")
logger.info(f" Router port: {router_port}")
logger.info(f" Workers: {num_workers}")
logger.info(f" TP size: {tp_size}")
logger.info(f" Policy: {policy}")
# Step 1: Launch workers with gRPC enabled
workers = []
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
worker_url = f"grpc://127.0.0.1:{worker_port}"
worker_urls.append(worker_url)
print(f"\n[Worker {i+1}/{num_workers}]")
print(f" Port: {worker_port}")
print(f" URL: {worker_url}")
logger.info(f"\n[Worker {i+1}/{num_workers}]")
logger.info(f" Port: {worker_port}")
logger.info(f" URL: {worker_url}")
# Build worker command
worker_cmd = [
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
)
workers.append(worker_proc)
print(f" PID: {worker_proc.pid}")
logger.info(f" PID: {worker_proc.pid}")
# Give workers a moment to start binding to ports
# The router will check worker health when it starts
print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
time.sleep(20)
# Quick check: make sure worker processes are still alive
for i, worker in enumerate(workers):
if worker.poll() is not None:
print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
logger.error(
f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
)
# Cleanup: kill all workers
for w in workers:
try:
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
pass
raise RuntimeError(f"Worker {i+1} failed to start")
print(f"✓ All {num_workers} workers started (router will verify connectivity)")
logger.info(
f"✓ All {num_workers} workers started (router will verify connectivity)"
)
# Step 2: Launch router pointing to workers
print(f"\n[Router]")
print(f" Port: {router_port}")
print(f" Worker URLs: {', '.join(worker_urls)}")
logger.info(f"\n[Router]")
logger.info(f" Port: {router_port}")
logger.info(f" Worker URLs: {', '.join(worker_urls)}")
# Build router command
router_cmd = [
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
router_cmd.extend(router_args)
if show_output:
print(f" Command: {' '.join(router_cmd)}")
logger.info(f" Command: {' '.join(router_cmd)}")
# Launch router
if show_output:
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
stderr=subprocess.PIPE,
)
print(f" PID: {router_proc.pid}")
logger.info(f" PID: {router_proc.pid}")
# Wait for router to be ready
router_url = f"http://127.0.0.1:{router_port}"
print(f"\nWaiting for router to start at {router_url}...")
logger.info(f"\nWaiting for router to start at {router_url}...")
try:
wait_for_workers_ready(
router_url, expected_workers=num_workers, timeout=180, api_key=api_key
)
print(f"✓ Router ready at {router_url}")
logger.info(f"✓ Router ready at {router_url}")
except TimeoutError:
print(f"✗ Router failed to start")
logger.error(f"✗ Router failed to start")
# Cleanup: kill router and all workers
try:
router_proc.kill()
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
pass
raise
print(f"\n{'='*70}")
print(f"✓ gRPC cluster ready!")
print(f" Router: {router_url}")
print(f" Workers: {len(workers)}")
print(f"{'='*70}\n")
logger.info(f"\n{'='*70}")
logger.info(f"✓ gRPC cluster ready!")
logger.info(f" Router: {router_url}")
logger.info(f" Workers: {len(workers)}")
logger.info(f"{'='*70}\n")
return {
"workers": workers,

View File

@@ -13,10 +13,7 @@ Run with:
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import time
import unittest
from pathlib import Path

View File

@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
"""
import json
# CHANGE: Import router launcher instead of server launcher
import sys
import unittest
from pathlib import Path

View File

@@ -8,6 +8,7 @@ Extracted and adapted from:
- sglang.test.test_utils (constants and CustomTestCase)
"""
import logging
import os
import signal
import threading
@@ -17,6 +18,8 @@ from typing import Optional, Union
import psutil
logger = logging.getLogger(__name__)
try:
from transformers import (
AutoTokenizer,
@@ -204,8 +207,8 @@ def get_tokenizer(
raise RuntimeError(err_msg) from e
if not isinstance(tokenizer, PreTrainedTokenizerFast):
print(
f"Warning: Using a slow tokenizer. This might cause a performance "
logger.warning(
f"Using a slow tokenizer. This might cause a performance "
f"degradation. Consider using a fast tokenizer instead."
)
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
return super(CustomTestCase, self)._callTestMethod(method)
except Exception as e:
if attempt < max_retry:
print(
logger.info(
f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
)
continue
else:
# Last attempt, re-raise the exception
raise
def setUp(self):
"""Print test method name at the start of each test."""
print(f"[Test Method] {self._testMethodName}", flush=True)

View File

@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
"""
import os
# CHANGE: Import router launcher instead of server launcher
import sys
import time
import unittest
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
self.stderr.flush()
lines = open(STDERR_FILENAME).readlines()
for line in lines[pt:]:
print(line, end="", flush=True)
if f"#running-req: {num_requests}" in line:
all_requests_running = True
pt = -1

View File

@@ -12,7 +12,6 @@ Run with:
pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
"""
# CHANGE: Import router launcher instead of server launcher
import sys
from pathlib import Path