From 28b8a4064d2a376fcdb901ad1ffb9432fc8850bc Mon Sep 17 00:00:00 2001 From: Chang Su Date: Thu, 23 Oct 2025 11:56:57 -0700 Subject: [PATCH] [router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024) --- .pre-commit-config.yaml | 2 +- sgl-router/py_test/e2e/test_e2e_embeddings.py | 1 - sgl-router/py_test/e2e/test_pd_router.py | 1 - .../e2e_grpc/basic/test_openai_server.py | 4 - sgl-router/py_test/e2e_grpc/conftest.py | 2 +- .../e2e_grpc/features/test_enable_thinking.py | 10 -- .../features/test_reasoning_content.py | 4 - sgl-router/py_test/e2e_grpc/fixtures.py | 71 +++++++----- .../test_openai_function_calling.py | 3 - .../function_call/test_tool_choice.py | 2 - sgl-router/py_test/e2e_grpc/util.py | 13 +-- .../validation/test_large_max_new_tokens.py | 3 - .../test_openai_server_ignore_eos.py | 1 - .../py_test/e2e_response_api/conftest.py | 2 +- sgl-router/py_test/e2e_response_api/mcp.py | 1 - .../e2e_response_api/router_fixtures.py | 107 ++++++++++-------- .../e2e_response_api/state_management.py | 5 - sgl-router/py_test/e2e_response_api/util.py | 9 +- sgl-router/py_test/integration/conftest.py | 3 +- .../load_balancing/test_power_of_two.py | 1 + .../integration/load_balancing/test_random.py | 1 - .../load_balancing/test_round_robin.py | 1 - .../integration/test_fault_tolerance.py | 4 - .../py_test/integration/test_pd_routing.py | 1 - .../py_test/integration/test_rate_limiting.py | 1 - .../py_test/integration/test_retries.py | 4 - .../integration/test_worker_management.py | 4 - sgl-router/py_test/unit/test_arg_parser.py | 2 - sgl-router/py_test/unit/test_router_config.py | 1 - .../py_test/unit/test_startup_sequence.py | 3 +- sgl-router/py_test/unit/test_validation.py | 1 - 31 files changed, 115 insertions(+), 153 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 70a145b5a..21afe122c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: args: - --select=F401,F821 - --fix - files: ^(benchmark/|docs/|examples/|python/sglang/) + files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*) exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$ - repo: https://github.com/psf/black rev: 24.10.0 diff --git a/sgl-router/py_test/e2e/test_e2e_embeddings.py b/sgl-router/py_test/e2e/test_e2e_embeddings.py index 1b852ef7e..31727e949 100644 --- a/sgl-router/py_test/e2e/test_e2e_embeddings.py +++ b/sgl-router/py_test/e2e/test_e2e_embeddings.py @@ -1,5 +1,4 @@ import time -from types import SimpleNamespace import pytest import requests diff --git a/sgl-router/py_test/e2e/test_pd_router.py b/sgl-router/py_test/e2e/test_pd_router.py index eccbad4d1..4827cbb4f 100644 --- a/sgl-router/py_test/e2e/test_pd_router.py +++ b/sgl-router/py_test/e2e/test_pd_router.py @@ -1,5 +1,4 @@ import logging -import os import socket import subprocess import time diff --git a/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py b/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py index bc9cd05eb..689e7e464 100644 --- a/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py +++ b/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py @@ -13,14 +13,11 @@ Run with: """ import json - -# CHANGE: Import router launcher instead of server launcher import sys import unittest from pathlib import Path import openai -import requests _TEST_DIR = Path(__file__).parent sys.path.insert(0, str(_TEST_DIR.parent)) @@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase): try: js_obj = json.loads(text) except (TypeError, json.decoder.JSONDecodeError): - print("JSONDecodeError", text) raise assert isinstance(js_obj["name"], str) assert isinstance(js_obj["population"], int) diff --git a/sgl-router/py_test/e2e_grpc/conftest.py b/sgl-router/py_test/e2e_grpc/conftest.py index 2bc671eaa..7eda72ea8 100644 --- a/sgl-router/py_test/e2e_grpc/conftest.py +++ b/sgl-router/py_test/e2e_grpc/conftest.py @@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes import sys from pathlib import Path -import pytest +import pytest # noqa: F401 # Ensure router py_src is importable _ROUTER_ROOT = Path(__file__).resolve().parents[2] diff --git a/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py b/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py index 9d1dcf25a..a5042c372 100644 --- a/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py +++ b/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py @@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning """ -import asyncio import json -import os import sys -import time import unittest - -# CHANGE: Import router launcher instead of server launcher from pathlib import Path -import openai import requests _TEST_DIR = Path(__file__).parent @@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent)) from fixtures import popen_launch_workers_and_router from util import ( DEFAULT_ENABLE_THINKING_MODEL_PATH, - DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, - get_tokenizer, kill_process_tree, ) @@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase): has_reasoning = False has_content = False - print("\n=== Stream With Reasoning ===") for line in response.iter_lines(): if line: line = line.decode("utf-8") @@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase): has_reasoning = False has_content = False - print("\n=== Stream Without Reasoning ===") for line in response.iter_lines(): if line: line = line.decode("utf-8") diff --git a/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py b/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py index 2654e0a31..28f7e01b1 100644 --- a/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py +++ b/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py @@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming """ -import json - -# CHANGE: Import router launcher instead of server launcher import sys import unittest from pathlib import Path import openai -import requests _TEST_DIR = Path(__file__).parent sys.path.insert(0, str(_TEST_DIR.parent)) diff --git a/sgl-router/py_test/e2e_grpc/fixtures.py b/sgl-router/py_test/e2e_grpc/fixtures.py index 869c70167..b988c8939 100644 --- a/sgl-router/py_test/e2e_grpc/fixtures.py +++ b/sgl-router/py_test/e2e_grpc/fixtures.py @@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ This approach gives more control and matches production deployment patterns. """ +import logging import socket import subprocess import time @@ -15,6 +16,8 @@ from typing import Optional import requests +logger = logging.getLogger(__name__) + def find_free_port() -> int: """Find an available port on localhost.""" @@ -56,9 +59,11 @@ def wait_for_workers_ready( attempt += 1 elapsed = int(time.time() - start_time) - # Print progress every 10 seconds + # Log progress every 10 seconds if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: - print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)") + logger.info( + f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)" + ) try: response = session.get( @@ -69,7 +74,7 @@ def wait_for_workers_ready( total_workers = data.get("total", 0) if total_workers == expected_workers: - print( + logger.info( f" All {expected_workers} workers connected after {elapsed}s" ) return @@ -161,14 +166,14 @@ def popen_launch_workers_and_router( else: router_port = find_free_port() - print(f"\n{'='*70}") - print(f"Launching gRPC cluster (separate workers + router)") - print(f"{'='*70}") - print(f" Model: {model}") - print(f" Router port: {router_port}") - print(f" Workers: {num_workers}") - print(f" TP size: {tp_size}") - print(f" Policy: {policy}") + logger.info(f"\n{'='*70}") + logger.info(f"Launching gRPC cluster (separate workers + router)") + logger.info(f"{'='*70}") + logger.info(f" Model: {model}") + logger.info(f" Router port: {router_port}") + logger.info(f" Workers: {num_workers}") + logger.info(f" TP size: {tp_size}") + logger.info(f" Policy: {policy}") # Step 1: Launch workers with gRPC enabled workers = [] @@ -179,9 +184,9 @@ def popen_launch_workers_and_router( worker_url = f"grpc://127.0.0.1:{worker_port}" worker_urls.append(worker_url) - print(f"\n[Worker {i+1}/{num_workers}]") - print(f" Port: {worker_port}") - print(f" URL: {worker_url}") + logger.info(f"\n[Worker {i+1}/{num_workers}]") + logger.info(f" Port: {worker_port}") + logger.info(f" URL: {worker_url}") # Build worker command worker_cmd = [ @@ -226,17 +231,19 @@ def popen_launch_workers_and_router( ) workers.append(worker_proc) - print(f" PID: {worker_proc.pid}") + logger.info(f" PID: {worker_proc.pid}") # Give workers a moment to start binding to ports # The router will check worker health when it starts - print(f"\nWaiting for {num_workers} workers to initialize (20s)...") + logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...") time.sleep(20) # Quick check: make sure worker processes are still alive for i, worker in enumerate(workers): if worker.poll() is not None: - print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})") + logger.error( + f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})" + ) # Cleanup: kill all workers for w in workers: try: @@ -245,12 +252,14 @@ def popen_launch_workers_and_router( pass raise RuntimeError(f"Worker {i+1} failed to start") - print(f"✓ All {num_workers} workers started (router will verify connectivity)") + logger.info( + f"✓ All {num_workers} workers started (router will verify connectivity)" + ) # Step 2: Launch router pointing to workers - print(f"\n[Router]") - print(f" Port: {router_port}") - print(f" Worker URLs: {', '.join(worker_urls)}") + logger.info(f"\n[Router]") + logger.info(f" Port: {router_port}") + logger.info(f" Worker URLs: {', '.join(worker_urls)}") # Build router command router_cmd = [ @@ -284,7 +293,7 @@ def popen_launch_workers_and_router( router_cmd.extend(router_args) if show_output: - print(f" Command: {' '.join(router_cmd)}") + logger.info(f" Command: {' '.join(router_cmd)}") # Launch router if show_output: @@ -296,19 +305,19 @@ def popen_launch_workers_and_router( stderr=subprocess.PIPE, ) - print(f" PID: {router_proc.pid}") + logger.info(f" PID: {router_proc.pid}") # Wait for router to be ready router_url = f"http://127.0.0.1:{router_port}" - print(f"\nWaiting for router to start at {router_url}...") + logger.info(f"\nWaiting for router to start at {router_url}...") try: wait_for_workers_ready( router_url, expected_workers=num_workers, timeout=180, api_key=api_key ) - print(f"✓ Router ready at {router_url}") + logger.info(f"✓ Router ready at {router_url}") except TimeoutError: - print(f"✗ Router failed to start") + logger.error(f"✗ Router failed to start") # Cleanup: kill router and all workers try: router_proc.kill() @@ -321,11 +330,11 @@ def popen_launch_workers_and_router( pass raise - print(f"\n{'='*70}") - print(f"✓ gRPC cluster ready!") - print(f" Router: {router_url}") - print(f" Workers: {len(workers)}") - print(f"{'='*70}\n") + logger.info(f"\n{'='*70}") + logger.info(f"✓ gRPC cluster ready!") + logger.info(f" Router: {router_url}") + logger.info(f" Workers: {len(workers)}") + logger.info(f"{'='*70}\n") return { "workers": workers, diff --git a/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py b/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py index 9343e0c59..096a7cbc7 100644 --- a/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py +++ b/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py @@ -13,10 +13,7 @@ Run with: """ import json - -# CHANGE: Import router launcher instead of server launcher import sys -import time import unittest from pathlib import Path diff --git a/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py b/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py index 9c8d41cfe..64d5895db 100644 --- a/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py +++ b/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py @@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s """ import json - -# CHANGE: Import router launcher instead of server launcher import sys import unittest from pathlib import Path diff --git a/sgl-router/py_test/e2e_grpc/util.py b/sgl-router/py_test/e2e_grpc/util.py index 4dd51221f..cc99c3c38 100644 --- a/sgl-router/py_test/e2e_grpc/util.py +++ b/sgl-router/py_test/e2e_grpc/util.py @@ -8,6 +8,7 @@ Extracted and adapted from: - sglang.test.test_utils (constants and CustomTestCase) """ +import logging import os import signal import threading @@ -17,6 +18,8 @@ from typing import Optional, Union import psutil +logger = logging.getLogger(__name__) + try: from transformers import ( AutoTokenizer, @@ -204,8 +207,8 @@ def get_tokenizer( raise RuntimeError(err_msg) from e if not isinstance(tokenizer, PreTrainedTokenizerFast): - print( - f"Warning: Using a slow tokenizer. This might cause a performance " + logger.warning( + f"Using a slow tokenizer. This might cause a performance " f"degradation. Consider using a fast tokenizer instead." ) @@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase): return super(CustomTestCase, self)._callTestMethod(method) except Exception as e: if attempt < max_retry: - print( + logger.info( f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..." ) continue else: # Last attempt, re-raise the exception raise - - def setUp(self): - """Print test method name at the start of each test.""" - print(f"[Test Method] {self._testMethodName}", flush=True) diff --git a/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py b/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py index 1c7879a78..2ea486973 100644 --- a/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py +++ b/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py @@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge """ import os - -# CHANGE: Import router launcher instead of server launcher import sys import time import unittest @@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase): self.stderr.flush() lines = open(STDERR_FILENAME).readlines() for line in lines[pt:]: - print(line, end="", flush=True) if f"#running-req: {num_requests}" in line: all_requests_running = True pt = -1 diff --git a/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py b/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py index 396618797..0b255de4e 100644 --- a/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py +++ b/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py @@ -12,7 +12,6 @@ Run with: pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v """ -# CHANGE: Import router launcher instead of server launcher import sys from pathlib import Path diff --git a/sgl-router/py_test/e2e_response_api/conftest.py b/sgl-router/py_test/e2e_response_api/conftest.py index 04bd6c453..96c3fc966 100644 --- a/sgl-router/py_test/e2e_response_api/conftest.py +++ b/sgl-router/py_test/e2e_response_api/conftest.py @@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests. This configures pytest to not collect base test classes that are meant to be inherited. """ -import pytest +import pytest # noqa: F401 def pytest_collection_modifyitems(config, items): diff --git a/sgl-router/py_test/e2e_response_api/mcp.py b/sgl-router/py_test/e2e_response_api/mcp.py index 57dda72c8..b4b33ba67 100644 --- a/sgl-router/py_test/e2e_response_api/mcp.py +++ b/sgl-router/py_test/e2e_response_api/mcp.py @@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest): self.assertEqual(resp.status_code, 200) data = resp.json() - print(f"MCP response: {data}") # Basic response structure self.assertIn("id", data) diff --git a/sgl-router/py_test/e2e_response_api/router_fixtures.py b/sgl-router/py_test/e2e_response_api/router_fixtures.py index 4cd2f60e4..e2beb60d3 100644 --- a/sgl-router/py_test/e2e_response_api/router_fixtures.py +++ b/sgl-router/py_test/e2e_response_api/router_fixtures.py @@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac This supports testing the Response API against real cloud providers. """ +import logging import os import socket import subprocess @@ -16,6 +17,8 @@ from typing import Optional import requests +logger = logging.getLogger(__name__) + def wait_for_workers_ready( router_url: str, @@ -50,9 +53,11 @@ def wait_for_workers_ready( attempt += 1 elapsed = int(time.time() - start_time) - # Print progress every 10 seconds + # Log progress every 10 seconds if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: - print(f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)") + logger.info( + f" Still waiting for workers... ({elapsed}/{timeout}s elapsed)" + ) try: response = session.get( @@ -63,7 +68,7 @@ def wait_for_workers_ready( total_workers = data.get("total", 0) if total_workers == expected_workers: - print( + logger.info( f" All {expected_workers} workers connected after {elapsed}s" ) return @@ -124,16 +129,18 @@ def wait_for_router_ready( attempt += 1 elapsed = int(time.time() - start_time) - # Print progress every 10 seconds + # Log progress every 10 seconds if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0: - print(f" Still waiting for router... ({elapsed}/{timeout}s elapsed)") + logger.info( + f" Still waiting for router... ({elapsed}/{timeout}s elapsed)" + ) try: response = session.get( f"{router_url}/health", headers=headers, timeout=5 ) if response.status_code == 200: - print(f" Router ready after {elapsed}s") + logger.info(f" Router ready after {elapsed}s") return else: last_error = f"HTTP {response.status_code}" @@ -204,12 +211,12 @@ def popen_launch_openai_xai_router( else: router_port = find_free_port() - print(f"\n{'='*70}") - print(f"Launching {backend.upper()} router") - print(f"{'='*70}") - print(f" Backend: {backend}") - print(f" Router port: {router_port}") - print(f" History backend: {history_backend}") + logger.info(f"\n{'='*70}") + logger.info(f"Launching {backend.upper()} router") + logger.info(f"{'='*70}") + logger.info(f" Backend: {backend}") + logger.info(f" Router port: {router_port}") + logger.info(f" History backend: {history_backend}") # Determine worker URL based on backend if backend == "openai": @@ -231,7 +238,7 @@ def popen_launch_openai_xai_router( else: raise ValueError(f"Unsupported backend: {backend}") - print(f" Worker URL: {worker_url}") + logger.info(f" Worker URL: {worker_url}") # Build router command router_cmd = [ @@ -266,7 +273,7 @@ def popen_launch_openai_xai_router( router_cmd.extend(router_args) if show_output: - print(f" Command: {' '.join(router_cmd)}") + logger.info(f" Command: {' '.join(router_cmd)}") # Set up environment with backend API key env = os.environ.copy() @@ -299,9 +306,9 @@ def popen_launch_openai_xai_router( try: wait_for_router_ready(router_url, timeout=timeout, api_key=None) - print(f"✓ Router ready at {router_url}") + logger.info(f"✓ Router ready at {router_url}") except TimeoutError: - print(f"✗ Router failed to start") + logger.error(f"✗ Router failed to start") # Cleanup: kill router try: router_proc.kill() @@ -309,10 +316,10 @@ def popen_launch_openai_xai_router( pass raise - print(f"\n{'='*70}") - print(f"✓ {backend.upper()} router ready!") - print(f" Router: {router_url}") - print(f"{'='*70}\n") + logger.info(f"\n{'='*70}") + logger.info(f"✓ {backend.upper()} router ready!") + logger.info(f" Router: {router_url}") + logger.info(f"{'='*70}\n") return { "router": router_proc, @@ -382,14 +389,14 @@ def popen_launch_workers_and_router( else: router_port = find_free_port() - print(f"\n{'='*70}") - print(f"Launching gRPC cluster (separate workers + router)") - print(f"{'='*70}") - print(f" Model: {model}") - print(f" Router port: {router_port}") - print(f" Workers: {num_workers}") - print(f" TP size: {tp_size}") - print(f" Policy: {policy}") + logger.info(f"\n{'='*70}") + logger.info(f"Launching gRPC cluster (separate workers + router)") + logger.info(f"{'='*70}") + logger.info(f" Model: {model}") + logger.info(f" Router port: {router_port}") + logger.info(f" Workers: {num_workers}") + logger.info(f" TP size: {tp_size}") + logger.info(f" Policy: {policy}") # Step 1: Launch workers with gRPC enabled workers = [] @@ -400,9 +407,9 @@ def popen_launch_workers_and_router( worker_url = f"grpc://127.0.0.1:{worker_port}" worker_urls.append(worker_url) - print(f"\n[Worker {i+1}/{num_workers}]") - print(f" Port: {worker_port}") - print(f" URL: {worker_url}") + logger.info(f"\n[Worker {i+1}/{num_workers}]") + logger.info(f" Port: {worker_port}") + logger.info(f" URL: {worker_url}") # Build worker command worker_cmd = [ @@ -447,17 +454,19 @@ def popen_launch_workers_and_router( ) workers.append(worker_proc) - print(f" PID: {worker_proc.pid}") + logger.info(f" PID: {worker_proc.pid}") # Give workers a moment to start binding to ports # The router will check worker health when it starts - print(f"\nWaiting for {num_workers} workers to initialize (20s)...") + logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...") time.sleep(20) # Quick check: make sure worker processes are still alive for i, worker in enumerate(workers): if worker.poll() is not None: - print(f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})") + logger.error( + f" ✗ Worker {i+1} died during startup (exit code: {worker.poll()})" + ) # Cleanup: kill all workers for w in workers: try: @@ -466,12 +475,14 @@ def popen_launch_workers_and_router( pass raise RuntimeError(f"Worker {i+1} failed to start") - print(f"✓ All {num_workers} workers started (router will verify connectivity)") + logger.info( + f"✓ All {num_workers} workers started (router will verify connectivity)" + ) # Step 2: Launch router pointing to workers - print(f"\n[Router]") - print(f" Port: {router_port}") - print(f" Worker URLs: {', '.join(worker_urls)}") + logger.info(f"\n[Router]") + logger.info(f" Port: {router_port}") + logger.info(f" Worker URLs: {', '.join(worker_urls)}") # Build router command router_cmd = [ @@ -505,7 +516,7 @@ def popen_launch_workers_and_router( router_cmd.extend(router_args) if show_output: - print(f" Command: {' '.join(router_cmd)}") + logger.info(f" Command: {' '.join(router_cmd)}") # Launch router if show_output: @@ -517,19 +528,19 @@ def popen_launch_workers_and_router( stderr=subprocess.PIPE, ) - print(f" PID: {router_proc.pid}") + logger.info(f" PID: {router_proc.pid}") # Wait for router to be ready router_url = f"http://127.0.0.1:{router_port}" - print(f"\nWaiting for router to start at {router_url}...") + logger.info(f"\nWaiting for router to start at {router_url}...") try: wait_for_workers_ready( router_url, expected_workers=num_workers, timeout=180, api_key=api_key ) - print(f"✓ Router ready at {router_url}") + logger.info(f"✓ Router ready at {router_url}") except TimeoutError: - print(f"✗ Router failed to start") + logger.error(f"✗ Router failed to start") # Cleanup: kill router and all workers try: router_proc.kill() @@ -542,11 +553,11 @@ def popen_launch_workers_and_router( pass raise - print(f"\n{'='*70}") - print(f"✓ gRPC cluster ready!") - print(f" Router: {router_url}") - print(f" Workers: {len(workers)}") - print(f"{'='*70}\n") + logger.info(f"\n{'='*70}") + logger.info(f"✓ gRPC cluster ready!") + logger.info(f" Router: {router_url}") + logger.info(f" Workers: {len(workers)}") + logger.info(f"{'='*70}\n") return { "workers": workers, diff --git a/sgl-router/py_test/e2e_response_api/state_management.py b/sgl-router/py_test/e2e_response_api/state_management.py index b35049093..a74223cc3 100644 --- a/sgl-router/py_test/e2e_response_api/state_management.py +++ b/sgl-router/py_test/e2e_response_api/state_management.py @@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest): resp = self.create_response( "Test", previous_response_id="resp_invalid123", max_output_tokens=50 ) - # Should return 404 or 400 for invalid response ID - if resp.status_code != 200: - print(f"\n❌ Response creation failed!") - print(f"Status: {resp.status_code}") - print(f"Response: {resp.text}") self.assertIn(resp.status_code, [400, 404]) def test_conversation_with_multiple_turns(self): diff --git a/sgl-router/py_test/e2e_response_api/util.py b/sgl-router/py_test/e2e_response_api/util.py index 74dddf015..222b648b6 100644 --- a/sgl-router/py_test/e2e_response_api/util.py +++ b/sgl-router/py_test/e2e_response_api/util.py @@ -2,6 +2,7 @@ Utility functions for Response API e2e tests. """ +import logging import os import signal import threading @@ -9,6 +10,8 @@ import unittest import psutil +logger = logging.getLogger(__name__) + def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None): """ @@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase): return super(CustomTestCase, self)._callTestMethod(method) except Exception as e: if attempt < max_retry: - print( + logger.info( f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..." ) continue else: # Last attempt, re-raise the exception raise - - def setUp(self): - """Print test method name at the start of each test.""" - print(f"[Test Method] {self._testMethodName}", flush=True) diff --git a/sgl-router/py_test/integration/conftest.py b/sgl-router/py_test/integration/conftest.py index 0dc7bc3c3..0de4b6ddf 100644 --- a/sgl-router/py_test/integration/conftest.py +++ b/sgl-router/py_test/integration/conftest.py @@ -1,8 +1,7 @@ -import os import subprocess import time from pathlib import Path -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Iterable, List, Optional, Tuple import pytest import requests diff --git a/sgl-router/py_test/integration/load_balancing/test_power_of_two.py b/sgl-router/py_test/integration/load_balancing/test_power_of_two.py index c56f4d38a..c69c4d89c 100644 --- a/sgl-router/py_test/integration/load_balancing/test_power_of_two.py +++ b/sgl-router/py_test/integration/load_balancing/test_power_of_two.py @@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager): urls = urls_slow + urls_fast ids = ids_slow + ids_fast slow_id = ids_slow[0] + slow_url = urls_slow[0] rh = router_manager.start_router( worker_urls=urls, diff --git a/sgl-router/py_test/integration/load_balancing/test_random.py b/sgl-router/py_test/integration/load_balancing/test_random.py index 41a613e12..4662dbce0 100644 --- a/sgl-router/py_test/integration/load_balancing/test_random.py +++ b/sgl-router/py_test/integration/load_balancing/test_random.py @@ -1,5 +1,4 @@ import collections -import math import pytest import requests diff --git a/sgl-router/py_test/integration/load_balancing/test_round_robin.py b/sgl-router/py_test/integration/load_balancing/test_round_robin.py index 966f3747a..13f149635 100644 --- a/sgl-router/py_test/integration/load_balancing/test_round_robin.py +++ b/sgl-router/py_test/integration/load_balancing/test_round_robin.py @@ -1,5 +1,4 @@ import collections -import time import pytest import requests diff --git a/sgl-router/py_test/integration/test_fault_tolerance.py b/sgl-router/py_test/integration/test_fault_tolerance.py index 78e5968ce..6cadf1fae 100644 --- a/sgl-router/py_test/integration/test_fault_tolerance.py +++ b/sgl-router/py_test/integration/test_fault_tolerance.py @@ -1,7 +1,3 @@ -import concurrent.futures -import subprocess -import time - import pytest import requests diff --git a/sgl-router/py_test/integration/test_pd_routing.py b/sgl-router/py_test/integration/test_pd_routing.py index d0ae7d552..00919868d 100644 --- a/sgl-router/py_test/integration/test_pd_routing.py +++ b/sgl-router/py_test/integration/test_pd_routing.py @@ -1,6 +1,5 @@ import collections import concurrent.futures -import subprocess import time import pytest diff --git a/sgl-router/py_test/integration/test_rate_limiting.py b/sgl-router/py_test/integration/test_rate_limiting.py index 4297d77c9..960c67a91 100644 --- a/sgl-router/py_test/integration/test_rate_limiting.py +++ b/sgl-router/py_test/integration/test_rate_limiting.py @@ -1,5 +1,4 @@ import concurrent.futures -import time import pytest import requests diff --git a/sgl-router/py_test/integration/test_retries.py b/sgl-router/py_test/integration/test_retries.py index 30826a665..c5f61049b 100644 --- a/sgl-router/py_test/integration/test_retries.py +++ b/sgl-router/py_test/integration/test_retries.py @@ -1,7 +1,3 @@ -import concurrent.futures -import subprocess -import time - import pytest import requests diff --git a/sgl-router/py_test/integration/test_worker_management.py b/sgl-router/py_test/integration/test_worker_management.py index 8acb94114..4eace76e0 100644 --- a/sgl-router/py_test/integration/test_worker_management.py +++ b/sgl-router/py_test/integration/test_worker_management.py @@ -1,7 +1,3 @@ -import collections -import subprocess -import time - import pytest import requests diff --git a/sgl-router/py_test/unit/test_arg_parser.py b/sgl-router/py_test/unit/test_arg_parser.py index 0da764ddf..d8881c668 100644 --- a/sgl-router/py_test/unit/test_arg_parser.py +++ b/sgl-router/py_test/unit/test_arg_parser.py @@ -5,9 +5,7 @@ These tests focus on testing the argument parsing logic in isolation, without starting actual router instances. """ -import argparse from types import SimpleNamespace -from unittest.mock import MagicMock, patch import pytest from sglang_router.launch_router import RouterArgs, parse_router_args diff --git a/sgl-router/py_test/unit/test_router_config.py b/sgl-router/py_test/unit/test_router_config.py index ed0d9db4b..51bceb7ba 100644 --- a/sgl-router/py_test/unit/test_router_config.py +++ b/sgl-router/py_test/unit/test_router_config.py @@ -5,7 +5,6 @@ These tests focus on testing the router configuration logic in isolation, including validation of configuration parameters and their interactions. """ -from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest diff --git a/sgl-router/py_test/unit/test_startup_sequence.py b/sgl-router/py_test/unit/test_startup_sequence.py index 133c7eb16..453adb81f 100644 --- a/sgl-router/py_test/unit/test_startup_sequence.py +++ b/sgl-router/py_test/unit/test_startup_sequence.py @@ -6,8 +6,7 @@ including router initialization, configuration validation, and startup flow. """ import logging -from types import SimpleNamespace -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch import pytest from sglang_router.launch_router import RouterArgs, launch_router diff --git a/sgl-router/py_test/unit/test_validation.py b/sgl-router/py_test/unit/test_validation.py index 1a3e54612..e6eef3102 100644 --- a/sgl-router/py_test/unit/test_validation.py +++ b/sgl-router/py_test/unit/test_validation.py @@ -5,7 +5,6 @@ These tests focus on testing the validation logic in isolation, including parameter validation, URL validation, and configuration validation. """ -from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest