From 28b8a4064d2a376fcdb901ad1ffb9432fc8850bc Mon Sep 17 00:00:00 2001
From: Chang Su <chang.s.su@oracle.com>
Date: Thu, 23 Oct 2025 11:56:57 -0700
Subject: [PATCH] [router][CI] Clean up imports and prints statements in
 sgl-router/py_test (#12024)

---
 .pre-commit-config.yaml                       |   2 +-
 sgl-router/py_test/e2e/test_e2e_embeddings.py |   1 -
 sgl-router/py_test/e2e/test_pd_router.py      |   1 -
 .../e2e_grpc/basic/test_openai_server.py      |   4 -
 sgl-router/py_test/e2e_grpc/conftest.py       |   2 +-
 .../e2e_grpc/features/test_enable_thinking.py |  10 --
 .../features/test_reasoning_content.py        |   4 -
 sgl-router/py_test/e2e_grpc/fixtures.py       |  71 +++++++-----
 .../test_openai_function_calling.py           |   3 -
 .../function_call/test_tool_choice.py         |   2 -
 sgl-router/py_test/e2e_grpc/util.py           |  13 +--
 .../validation/test_large_max_new_tokens.py   |   3 -
 .../test_openai_server_ignore_eos.py          |   1 -
 .../py_test/e2e_response_api/conftest.py      |   2 +-
 sgl-router/py_test/e2e_response_api/mcp.py    |   1 -
 .../e2e_response_api/router_fixtures.py       | 107 ++++++++++--------
 .../e2e_response_api/state_management.py      |   5 -
 sgl-router/py_test/e2e_response_api/util.py   |   9 +-
 sgl-router/py_test/integration/conftest.py    |   3 +-
 .../load_balancing/test_power_of_two.py       |   1 +
 .../integration/load_balancing/test_random.py |   1 -
 .../load_balancing/test_round_robin.py        |   1 -
 .../integration/test_fault_tolerance.py       |   4 -
 .../py_test/integration/test_pd_routing.py    |   1 -
 .../py_test/integration/test_rate_limiting.py |   1 -
 .../py_test/integration/test_retries.py       |   4 -
 .../integration/test_worker_management.py     |   4 -
 sgl-router/py_test/unit/test_arg_parser.py    |   2 -
 sgl-router/py_test/unit/test_router_config.py |   1 -
 .../py_test/unit/test_startup_sequence.py     |   3 +-
 sgl-router/py_test/unit/test_validation.py    |   1 -
 31 files changed, 115 insertions(+), 153 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 70a145b5a..21afe122c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,7 +30,7 @@ repos:
         args:
           - --select=F401,F821
           - --fix
-        files: ^(benchmark/|docs/|examples/|python/sglang/)
+        files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
         exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
   - repo: https://github.com/psf/black
     rev: 24.10.0
diff --git a/sgl-router/py_test/e2e/test_e2e_embeddings.py b/sgl-router/py_test/e2e/test_e2e_embeddings.py
index 1b852ef7e..31727e949 100644
--- a/sgl-router/py_test/e2e/test_e2e_embeddings.py
+++ b/sgl-router/py_test/e2e/test_e2e_embeddings.py
@@ -1,5 +1,4 @@
 import time
-from types import SimpleNamespace
 
 import pytest
 import requests
diff --git a/sgl-router/py_test/e2e/test_pd_router.py b/sgl-router/py_test/e2e/test_pd_router.py
index eccbad4d1..4827cbb4f 100644
--- a/sgl-router/py_test/e2e/test_pd_router.py
+++ b/sgl-router/py_test/e2e/test_pd_router.py
@@ -1,5 +1,4 @@
 import logging
-import os
 import socket
 import subprocess
 import time
diff --git a/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py b/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
index bc9cd05eb..689e7e464 100644
--- a/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
+++ b/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
@@ -13,14 +13,11 @@ Run with:
 """
 
 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path
 
 import openai
-import requests
 
 _TEST_DIR = Path(__file__).parent
 sys.path.insert(0, str(_TEST_DIR.parent))
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
         try:
             js_obj = json.loads(text)
         except (TypeError, json.decoder.JSONDecodeError):
-            print("JSONDecodeError", text)
             raise
         assert isinstance(js_obj["name"], str)
         assert isinstance(js_obj["population"], int)
diff --git a/sgl-router/py_test/e2e_grpc/conftest.py b/sgl-router/py_test/e2e_grpc/conftest.py
index 2bc671eaa..7eda72ea8 100644
--- a/sgl-router/py_test/e2e_grpc/conftest.py
+++ b/sgl-router/py_test/e2e_grpc/conftest.py
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
 import sys
 from pathlib import Path
 
-import pytest
+import pytest  # noqa: F401
 
 # Ensure router py_src is importable
 _ROUTER_ROOT = Path(__file__).resolve().parents[2]
diff --git a/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py b/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
index 9d1dcf25a..a5042c372 100644
--- a/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
+++ b/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
 python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
 """
 
-import asyncio
 import json
-import os
 import sys
-import time
 import unittest
-
-# CHANGE: Import router launcher instead of server launcher
 from pathlib import Path
 
-import openai
 import requests
 
 _TEST_DIR = Path(__file__).parent
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
 from fixtures import popen_launch_workers_and_router
 from util import (
     DEFAULT_ENABLE_THINKING_MODEL_PATH,
-    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
     DEFAULT_URL_FOR_TEST,
     CustomTestCase,
-    get_tokenizer,
     kill_process_tree,
 )
 
@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
         has_reasoning = False
         has_content = False
 
-        print("\n=== Stream With Reasoning ===")
         for line in response.iter_lines():
             if line:
                 line = line.decode("utf-8")
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
         has_reasoning = False
         has_content = False
 
-        print("\n=== Stream Without Reasoning ===")
         for line in response.iter_lines():
             if line:
                 line = line.decode("utf-8")
diff --git a/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py b/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
index 2654e0a31..28f7e01b1 100644
--- a/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
+++ b/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
 python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
 """
 
-import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path
 
 import openai
-import requests
 
 _TEST_DIR = Path(__file__).parent
 sys.path.insert(0, str(_TEST_DIR.parent))
diff --git a/sgl-router/py_test/e2e_grpc/fixtures.py b/sgl-router/py_test/e2e_grpc/fixtures.py
index 869c70167..b988c8939 100644
--- a/sgl-router/py_test/e2e_grpc/fixtures.py
+++ b/sgl-router/py_test/e2e_grpc/fixtures.py
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
 This approach gives more control and matches production deployment patterns.
 """
 
+import logging
 import socket
 import subprocess
 import time
@@ -15,6 +16,8 @@ from typing import Optional
 
 import requests
 
+logger = logging.getLogger(__name__)
+
 
 def find_free_port() -> int:
     """Find an available port on localhost."""
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
             attempt += 1
             elapsed = int(time.time() - start_time)
 
-            # Print progress every 10 seconds
+            # Log progress every 10 seconds
             if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
-                print(f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
+                logger.info(
+                    f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
+                )
 
             try:
                 response = session.get(
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
                     total_workers = data.get("total", 0)
 
                     if total_workers == expected_workers:
-                        print(
+                        logger.info(
                             f"  All {expected_workers} workers connected after {elapsed}s"
                         )
                         return
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
     else:
         router_port = find_free_port()
 
-    print(f"\n{'='*70}")
-    print(f"Launching gRPC cluster (separate workers + router)")
-    print(f"{'='*70}")
-    print(f"  Model: {model}")
-    print(f"  Router port: {router_port}")
-    print(f"  Workers: {num_workers}")
-    print(f"  TP size: {tp_size}")
-    print(f"  Policy: {policy}")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"Launching gRPC cluster (separate workers + router)")
+    logger.info(f"{'='*70}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  Router port: {router_port}")
+    logger.info(f"  Workers: {num_workers}")
+    logger.info(f"  TP size: {tp_size}")
+    logger.info(f"  Policy: {policy}")
 
     # Step 1: Launch workers with gRPC enabled
     workers = []
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
         worker_url = f"grpc://127.0.0.1:{worker_port}"
         worker_urls.append(worker_url)
 
-        print(f"\n[Worker {i+1}/{num_workers}]")
-        print(f"  Port: {worker_port}")
-        print(f"  URL: {worker_url}")
+        logger.info(f"\n[Worker {i+1}/{num_workers}]")
+        logger.info(f"  Port: {worker_port}")
+        logger.info(f"  URL: {worker_url}")
 
         # Build worker command
         worker_cmd = [
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
             )
 
         workers.append(worker_proc)
-        print(f"  PID: {worker_proc.pid}")
+        logger.info(f"  PID: {worker_proc.pid}")
 
     # Give workers a moment to start binding to ports
     # The router will check worker health when it starts
-    print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
+    logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
     time.sleep(20)
 
     # Quick check: make sure worker processes are still alive
     for i, worker in enumerate(workers):
         if worker.poll() is not None:
-            print(f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
+            logger.error(
+                f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
+            )
             # Cleanup: kill all workers
             for w in workers:
                 try:
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
                     pass
             raise RuntimeError(f"Worker {i+1} failed to start")
 
-    print(f"✓ All {num_workers} workers started (router will verify connectivity)")
+    logger.info(
+        f"✓ All {num_workers} workers started (router will verify connectivity)"
+    )
 
     # Step 2: Launch router pointing to workers
-    print(f"\n[Router]")
-    print(f"  Port: {router_port}")
-    print(f"  Worker URLs: {', '.join(worker_urls)}")
+    logger.info(f"\n[Router]")
+    logger.info(f"  Port: {router_port}")
+    logger.info(f"  Worker URLs: {', '.join(worker_urls)}")
 
     # Build router command
     router_cmd = [
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
         router_cmd.extend(router_args)
 
     if show_output:
-        print(f"  Command: {' '.join(router_cmd)}")
+        logger.info(f"  Command: {' '.join(router_cmd)}")
 
     # Launch router
     if show_output:
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
             stderr=subprocess.PIPE,
         )
 
-    print(f"  PID: {router_proc.pid}")
+    logger.info(f"  PID: {router_proc.pid}")
 
     # Wait for router to be ready
     router_url = f"http://127.0.0.1:{router_port}"
-    print(f"\nWaiting for router to start at {router_url}...")
+    logger.info(f"\nWaiting for router to start at {router_url}...")
 
     try:
         wait_for_workers_ready(
             router_url, expected_workers=num_workers, timeout=180, api_key=api_key
         )
-        print(f"✓ Router ready at {router_url}")
+        logger.info(f"✓ Router ready at {router_url}")
     except TimeoutError:
-        print(f"✗ Router failed to start")
+        logger.error(f"✗ Router failed to start")
         # Cleanup: kill router and all workers
         try:
             router_proc.kill()
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
                 pass
         raise
 
-    print(f"\n{'='*70}")
-    print(f"✓ gRPC cluster ready!")
-    print(f"  Router: {router_url}")
-    print(f"  Workers: {len(workers)}")
-    print(f"{'='*70}\n")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"✓ gRPC cluster ready!")
+    logger.info(f"  Router: {router_url}")
+    logger.info(f"  Workers: {len(workers)}")
+    logger.info(f"{'='*70}\n")
 
     return {
         "workers": workers,
diff --git a/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py b/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
index 9343e0c59..096a7cbc7 100644
--- a/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
+++ b/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
@@ -13,10 +13,7 @@ Run with:
 """
 
 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
-import time
 import unittest
 from pathlib import Path
 
diff --git a/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py b/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
index 9c8d41cfe..64d5895db 100644
--- a/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
+++ b/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
 """
 
 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path
diff --git a/sgl-router/py_test/e2e_grpc/util.py b/sgl-router/py_test/e2e_grpc/util.py
index 4dd51221f..cc99c3c38 100644
--- a/sgl-router/py_test/e2e_grpc/util.py
+++ b/sgl-router/py_test/e2e_grpc/util.py
@@ -8,6 +8,7 @@ Extracted and adapted from:
 - sglang.test.test_utils (constants and CustomTestCase)
 """
 
+import logging
 import os
 import signal
 import threading
@@ -17,6 +18,8 @@ from typing import Optional, Union
 
 import psutil
 
+logger = logging.getLogger(__name__)
+
 try:
     from transformers import (
         AutoTokenizer,
@@ -204,8 +207,8 @@ def get_tokenizer(
         raise RuntimeError(err_msg) from e
 
     if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        print(
-            f"Warning: Using a slow tokenizer. This might cause a performance "
+        logger.warning(
+            f"Using a slow tokenizer. This might cause a performance "
             f"degradation. Consider using a fast tokenizer instead."
         )
 
@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
                 return super(CustomTestCase, self)._callTestMethod(method)
             except Exception as e:
                 if attempt < max_retry:
-                    print(
+                    logger.info(
                         f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
                     )
                     continue
                 else:
                     # Last attempt, re-raise the exception
                     raise
-
-    def setUp(self):
-        """Print test method name at the start of each test."""
-        print(f"[Test Method] {self._testMethodName}", flush=True)
diff --git a/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py b/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
index 1c7879a78..2ea486973 100644
--- a/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
+++ b/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
 """
 
 import os
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import time
 import unittest
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
                 self.stderr.flush()
                 lines = open(STDERR_FILENAME).readlines()
                 for line in lines[pt:]:
-                    print(line, end="", flush=True)
                     if f"#running-req: {num_requests}" in line:
                         all_requests_running = True
                         pt = -1
diff --git a/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py b/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
index 396618797..0b255de4e 100644
--- a/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
+++ b/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
@@ -12,7 +12,6 @@ Run with:
     pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
 """
 
-# CHANGE: Import router launcher instead of server launcher
 import sys
 from pathlib import Path
 
diff --git a/sgl-router/py_test/e2e_response_api/conftest.py b/sgl-router/py_test/e2e_response_api/conftest.py
index 04bd6c453..96c3fc966 100644
--- a/sgl-router/py_test/e2e_response_api/conftest.py
+++ b/sgl-router/py_test/e2e_response_api/conftest.py
@@ -4,7 +4,7 @@ pytest configuration for e2e_response_api tests.
 This configures pytest to not collect base test classes that are meant to be inherited.
 """
 
-import pytest
+import pytest  # noqa: F401
 
 
 def pytest_collection_modifyitems(config, items):
diff --git a/sgl-router/py_test/e2e_response_api/mcp.py b/sgl-router/py_test/e2e_response_api/mcp.py
index 57dda72c8..b4b33ba67 100644
--- a/sgl-router/py_test/e2e_response_api/mcp.py
+++ b/sgl-router/py_test/e2e_response_api/mcp.py
@@ -32,7 +32,6 @@ class MCPTests(ResponseAPIBaseTest):
         self.assertEqual(resp.status_code, 200)
 
         data = resp.json()
-        print(f"MCP response: {data}")
 
         # Basic response structure
         self.assertIn("id", data)
diff --git a/sgl-router/py_test/e2e_response_api/router_fixtures.py b/sgl-router/py_test/e2e_response_api/router_fixtures.py
index 4cd2f60e4..e2beb60d3 100644
--- a/sgl-router/py_test/e2e_response_api/router_fixtures.py
+++ b/sgl-router/py_test/e2e_response_api/router_fixtures.py
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang router with OpenAI or XAI bac
 This supports testing the Response API against real cloud providers.
 """
 
+import logging
 import os
 import socket
 import subprocess
@@ -16,6 +17,8 @@ from typing import Optional
 
 import requests
 
+logger = logging.getLogger(__name__)
+
 
 def wait_for_workers_ready(
     router_url: str,
@@ -50,9 +53,11 @@ def wait_for_workers_ready(
             attempt += 1
             elapsed = int(time.time() - start_time)
 
-            # Print progress every 10 seconds
+            # Log progress every 10 seconds
             if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
-                print(f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
+                logger.info(
+                    f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
+                )
 
             try:
                 response = session.get(
@@ -63,7 +68,7 @@ def wait_for_workers_ready(
                     total_workers = data.get("total", 0)
 
                     if total_workers == expected_workers:
-                        print(
+                        logger.info(
                             f"  All {expected_workers} workers connected after {elapsed}s"
                         )
                         return
@@ -124,16 +129,18 @@ def wait_for_router_ready(
             attempt += 1
             elapsed = int(time.time() - start_time)
 
-            # Print progress every 10 seconds
+            # Log progress every 10 seconds
             if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
-                print(f"  Still waiting for router... ({elapsed}/{timeout}s elapsed)")
+                logger.info(
+                    f"  Still waiting for router... ({elapsed}/{timeout}s elapsed)"
+                )
 
             try:
                 response = session.get(
                     f"{router_url}/health", headers=headers, timeout=5
                 )
                 if response.status_code == 200:
-                    print(f"  Router ready after {elapsed}s")
+                    logger.info(f"  Router ready after {elapsed}s")
                     return
                 else:
                     last_error = f"HTTP {response.status_code}"
@@ -204,12 +211,12 @@ def popen_launch_openai_xai_router(
     else:
         router_port = find_free_port()
 
-    print(f"\n{'='*70}")
-    print(f"Launching {backend.upper()} router")
-    print(f"{'='*70}")
-    print(f"  Backend: {backend}")
-    print(f"  Router port: {router_port}")
-    print(f"  History backend: {history_backend}")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"Launching {backend.upper()} router")
+    logger.info(f"{'='*70}")
+    logger.info(f"  Backend: {backend}")
+    logger.info(f"  Router port: {router_port}")
+    logger.info(f"  History backend: {history_backend}")
 
     # Determine worker URL based on backend
     if backend == "openai":
@@ -231,7 +238,7 @@ def popen_launch_openai_xai_router(
     else:
         raise ValueError(f"Unsupported backend: {backend}")
 
-    print(f"  Worker URL: {worker_url}")
+    logger.info(f"  Worker URL: {worker_url}")
 
     # Build router command
     router_cmd = [
@@ -266,7 +273,7 @@ def popen_launch_openai_xai_router(
         router_cmd.extend(router_args)
 
     if show_output:
-        print(f"  Command: {' '.join(router_cmd)}")
+        logger.info(f"  Command: {' '.join(router_cmd)}")
 
     # Set up environment with backend API key
     env = os.environ.copy()
@@ -299,9 +306,9 @@ def popen_launch_openai_xai_router(
 
     try:
         wait_for_router_ready(router_url, timeout=timeout, api_key=None)
-        print(f"✓ Router ready at {router_url}")
+        logger.info(f"✓ Router ready at {router_url}")
     except TimeoutError:
-        print(f"✗ Router failed to start")
+        logger.error(f"✗ Router failed to start")
         # Cleanup: kill router
         try:
             router_proc.kill()
@@ -309,10 +316,10 @@ def popen_launch_openai_xai_router(
             pass
         raise
 
-    print(f"\n{'='*70}")
-    print(f"✓ {backend.upper()} router ready!")
-    print(f"  Router: {router_url}")
-    print(f"{'='*70}\n")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"✓ {backend.upper()} router ready!")
+    logger.info(f"  Router: {router_url}")
+    logger.info(f"{'='*70}\n")
 
     return {
         "router": router_proc,
@@ -382,14 +389,14 @@ def popen_launch_workers_and_router(
     else:
         router_port = find_free_port()
 
-    print(f"\n{'='*70}")
-    print(f"Launching gRPC cluster (separate workers + router)")
-    print(f"{'='*70}")
-    print(f"  Model: {model}")
-    print(f"  Router port: {router_port}")
-    print(f"  Workers: {num_workers}")
-    print(f"  TP size: {tp_size}")
-    print(f"  Policy: {policy}")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"Launching gRPC cluster (separate workers + router)")
+    logger.info(f"{'='*70}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  Router port: {router_port}")
+    logger.info(f"  Workers: {num_workers}")
+    logger.info(f"  TP size: {tp_size}")
+    logger.info(f"  Policy: {policy}")
 
     # Step 1: Launch workers with gRPC enabled
     workers = []
@@ -400,9 +407,9 @@ def popen_launch_workers_and_router(
         worker_url = f"grpc://127.0.0.1:{worker_port}"
         worker_urls.append(worker_url)
 
-        print(f"\n[Worker {i+1}/{num_workers}]")
-        print(f"  Port: {worker_port}")
-        print(f"  URL: {worker_url}")
+        logger.info(f"\n[Worker {i+1}/{num_workers}]")
+        logger.info(f"  Port: {worker_port}")
+        logger.info(f"  URL: {worker_url}")
 
         # Build worker command
         worker_cmd = [
@@ -447,17 +454,19 @@ def popen_launch_workers_and_router(
             )
 
         workers.append(worker_proc)
-        print(f"  PID: {worker_proc.pid}")
+        logger.info(f"  PID: {worker_proc.pid}")
 
     # Give workers a moment to start binding to ports
     # The router will check worker health when it starts
-    print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
+    logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
     time.sleep(20)
 
     # Quick check: make sure worker processes are still alive
     for i, worker in enumerate(workers):
         if worker.poll() is not None:
-            print(f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
+            logger.error(
+                f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
+            )
             # Cleanup: kill all workers
             for w in workers:
                 try:
@@ -466,12 +475,14 @@ def popen_launch_workers_and_router(
                     pass
             raise RuntimeError(f"Worker {i+1} failed to start")
 
-    print(f"✓ All {num_workers} workers started (router will verify connectivity)")
+    logger.info(
+        f"✓ All {num_workers} workers started (router will verify connectivity)"
+    )
 
     # Step 2: Launch router pointing to workers
-    print(f"\n[Router]")
-    print(f"  Port: {router_port}")
-    print(f"  Worker URLs: {', '.join(worker_urls)}")
+    logger.info(f"\n[Router]")
+    logger.info(f"  Port: {router_port}")
+    logger.info(f"  Worker URLs: {', '.join(worker_urls)}")
 
     # Build router command
     router_cmd = [
@@ -505,7 +516,7 @@ def popen_launch_workers_and_router(
         router_cmd.extend(router_args)
 
     if show_output:
-        print(f"  Command: {' '.join(router_cmd)}")
+        logger.info(f"  Command: {' '.join(router_cmd)}")
 
     # Launch router
     if show_output:
@@ -517,19 +528,19 @@ def popen_launch_workers_and_router(
             stderr=subprocess.PIPE,
         )
 
-    print(f"  PID: {router_proc.pid}")
+    logger.info(f"  PID: {router_proc.pid}")
 
     # Wait for router to be ready
     router_url = f"http://127.0.0.1:{router_port}"
-    print(f"\nWaiting for router to start at {router_url}...")
+    logger.info(f"\nWaiting for router to start at {router_url}...")
 
     try:
         wait_for_workers_ready(
             router_url, expected_workers=num_workers, timeout=180, api_key=api_key
         )
-        print(f"✓ Router ready at {router_url}")
+        logger.info(f"✓ Router ready at {router_url}")
     except TimeoutError:
-        print(f"✗ Router failed to start")
+        logger.error(f"✗ Router failed to start")
         # Cleanup: kill router and all workers
         try:
             router_proc.kill()
@@ -542,11 +553,11 @@ def popen_launch_workers_and_router(
                 pass
         raise
 
-    print(f"\n{'='*70}")
-    print(f"✓ gRPC cluster ready!")
-    print(f"  Router: {router_url}")
-    print(f"  Workers: {len(workers)}")
-    print(f"{'='*70}\n")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"✓ gRPC cluster ready!")
+    logger.info(f"  Router: {router_url}")
+    logger.info(f"  Workers: {len(workers)}")
+    logger.info(f"{'='*70}\n")
 
     return {
         "workers": workers,
diff --git a/sgl-router/py_test/e2e_response_api/state_management.py b/sgl-router/py_test/e2e_response_api/state_management.py
index b35049093..a74223cc3 100644
--- a/sgl-router/py_test/e2e_response_api/state_management.py
+++ b/sgl-router/py_test/e2e_response_api/state_management.py
@@ -49,11 +49,6 @@ class StateManagementTests(ResponseAPIBaseTest):
         resp = self.create_response(
             "Test", previous_response_id="resp_invalid123", max_output_tokens=50
         )
-        # Should return 404 or 400 for invalid response ID
-        if resp.status_code != 200:
-            print(f"\n❌ Response creation failed!")
-            print(f"Status: {resp.status_code}")
-            print(f"Response: {resp.text}")
         self.assertIn(resp.status_code, [400, 404])
 
     def test_conversation_with_multiple_turns(self):
diff --git a/sgl-router/py_test/e2e_response_api/util.py b/sgl-router/py_test/e2e_response_api/util.py
index 74dddf015..222b648b6 100644
--- a/sgl-router/py_test/e2e_response_api/util.py
+++ b/sgl-router/py_test/e2e_response_api/util.py
@@ -2,6 +2,7 @@
 Utility functions for Response API e2e tests.
 """
 
+import logging
 import os
 import signal
 import threading
@@ -9,6 +10,8 @@ import unittest
 
 import psutil
 
+logger = logging.getLogger(__name__)
+
 
 def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = None):
     """
@@ -69,14 +72,10 @@ class CustomTestCase(unittest.TestCase):
                 return super(CustomTestCase, self)._callTestMethod(method)
             except Exception as e:
                 if attempt < max_retry:
-                    print(
+                    logger.info(
                         f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
                     )
                     continue
                 else:
                     # Last attempt, re-raise the exception
                     raise
-
-    def setUp(self):
-        """Print test method name at the start of each test."""
-        print(f"[Test Method] {self._testMethodName}", flush=True)
diff --git a/sgl-router/py_test/integration/conftest.py b/sgl-router/py_test/integration/conftest.py
index 0dc7bc3c3..0de4b6ddf 100644
--- a/sgl-router/py_test/integration/conftest.py
+++ b/sgl-router/py_test/integration/conftest.py
@@ -1,8 +1,7 @@
-import os
 import subprocess
 import time
 from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Tuple
+from typing import Iterable, List, Optional, Tuple
 
 import pytest
 import requests
diff --git a/sgl-router/py_test/integration/load_balancing/test_power_of_two.py b/sgl-router/py_test/integration/load_balancing/test_power_of_two.py
index c56f4d38a..c69c4d89c 100644
--- a/sgl-router/py_test/integration/load_balancing/test_power_of_two.py
+++ b/sgl-router/py_test/integration/load_balancing/test_power_of_two.py
@@ -17,6 +17,7 @@ def test_power_of_two_prefers_less_loaded(mock_workers, router_manager):
     urls = urls_slow + urls_fast
     ids = ids_slow + ids_fast
     slow_id = ids_slow[0]
+    slow_url = urls_slow[0]
 
     rh = router_manager.start_router(
         worker_urls=urls,
diff --git a/sgl-router/py_test/integration/load_balancing/test_random.py b/sgl-router/py_test/integration/load_balancing/test_random.py
index 41a613e12..4662dbce0 100644
--- a/sgl-router/py_test/integration/load_balancing/test_random.py
+++ b/sgl-router/py_test/integration/load_balancing/test_random.py
@@ -1,5 +1,4 @@
 import collections
-import math
 
 import pytest
 import requests
diff --git a/sgl-router/py_test/integration/load_balancing/test_round_robin.py b/sgl-router/py_test/integration/load_balancing/test_round_robin.py
index 966f3747a..13f149635 100644
--- a/sgl-router/py_test/integration/load_balancing/test_round_robin.py
+++ b/sgl-router/py_test/integration/load_balancing/test_round_robin.py
@@ -1,5 +1,4 @@
 import collections
-import time
 
 import pytest
 import requests
diff --git a/sgl-router/py_test/integration/test_fault_tolerance.py b/sgl-router/py_test/integration/test_fault_tolerance.py
index 78e5968ce..6cadf1fae 100644
--- a/sgl-router/py_test/integration/test_fault_tolerance.py
+++ b/sgl-router/py_test/integration/test_fault_tolerance.py
@@ -1,7 +1,3 @@
-import concurrent.futures
-import subprocess
-import time
-
 import pytest
 import requests
 
diff --git a/sgl-router/py_test/integration/test_pd_routing.py b/sgl-router/py_test/integration/test_pd_routing.py
index d0ae7d552..00919868d 100644
--- a/sgl-router/py_test/integration/test_pd_routing.py
+++ b/sgl-router/py_test/integration/test_pd_routing.py
@@ -1,6 +1,5 @@
 import collections
 import concurrent.futures
-import subprocess
 import time
 
 import pytest
diff --git a/sgl-router/py_test/integration/test_rate_limiting.py b/sgl-router/py_test/integration/test_rate_limiting.py
index 4297d77c9..960c67a91 100644
--- a/sgl-router/py_test/integration/test_rate_limiting.py
+++ b/sgl-router/py_test/integration/test_rate_limiting.py
@@ -1,5 +1,4 @@
 import concurrent.futures
-import time
 
 import pytest
 import requests
diff --git a/sgl-router/py_test/integration/test_retries.py b/sgl-router/py_test/integration/test_retries.py
index 30826a665..c5f61049b 100644
--- a/sgl-router/py_test/integration/test_retries.py
+++ b/sgl-router/py_test/integration/test_retries.py
@@ -1,7 +1,3 @@
-import concurrent.futures
-import subprocess
-import time
-
 import pytest
 import requests
 
diff --git a/sgl-router/py_test/integration/test_worker_management.py b/sgl-router/py_test/integration/test_worker_management.py
index 8acb94114..4eace76e0 100644
--- a/sgl-router/py_test/integration/test_worker_management.py
+++ b/sgl-router/py_test/integration/test_worker_management.py
@@ -1,7 +1,3 @@
-import collections
-import subprocess
-import time
-
 import pytest
 import requests
 
diff --git a/sgl-router/py_test/unit/test_arg_parser.py b/sgl-router/py_test/unit/test_arg_parser.py
index 0da764ddf..d8881c668 100644
--- a/sgl-router/py_test/unit/test_arg_parser.py
+++ b/sgl-router/py_test/unit/test_arg_parser.py
@@ -5,9 +5,7 @@ These tests focus on testing the argument parsing logic in isolation,
 without starting actual router instances.
 """
 
-import argparse
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
 
 import pytest
 from sglang_router.launch_router import RouterArgs, parse_router_args
diff --git a/sgl-router/py_test/unit/test_router_config.py b/sgl-router/py_test/unit/test_router_config.py
index ed0d9db4b..51bceb7ba 100644
--- a/sgl-router/py_test/unit/test_router_config.py
+++ b/sgl-router/py_test/unit/test_router_config.py
@@ -5,7 +5,6 @@ These tests focus on testing the router configuration logic in isolation,
 including validation of configuration parameters and their interactions.
 """
 
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/sgl-router/py_test/unit/test_startup_sequence.py b/sgl-router/py_test/unit/test_startup_sequence.py
index 133c7eb16..453adb81f 100644
--- a/sgl-router/py_test/unit/test_startup_sequence.py
+++ b/sgl-router/py_test/unit/test_startup_sequence.py
@@ -6,8 +6,7 @@ including router initialization, configuration validation, and startup flow.
 """
 
 import logging
-from types import SimpleNamespace
-from unittest.mock import MagicMock, call, patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from sglang_router.launch_router import RouterArgs, launch_router
diff --git a/sgl-router/py_test/unit/test_validation.py b/sgl-router/py_test/unit/test_validation.py
index 1a3e54612..e6eef3102 100644
--- a/sgl-router/py_test/unit/test_validation.py
+++ b/sgl-router/py_test/unit/test_validation.py
@@ -5,7 +5,6 @@ These tests focus on testing the validation logic in isolation,
 including parameter validation, URL validation, and configuration validation.
 """
 
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 import pytest