[router][CI] Clean up imports and prints statements in sgl-router/py_test (#12024)

2025-10-23 11:56:57 -07:00
parent 8bd26dd4e6
commit 28b8a4064d
31 changed files with 115 additions and 153 deletions
--- a/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
+++ b/sgl-router/py_test/e2e_grpc/basic/test_openai_server.py
@@ -13,14 +13,11 @@ Run with:
 """

 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path

 import openai
-import requests

 _TEST_DIR = Path(__file__).parent
 sys.path.insert(0, str(_TEST_DIR.parent))
@@ -225,7 +222,6 @@ class TestOpenAIServer(CustomTestCase):
        try:
            js_obj = json.loads(text)
        except (TypeError, json.decoder.JSONDecodeError):
-            print("JSONDecodeError", text)
            raise
        assert isinstance(js_obj["name"], str)
        assert isinstance(js_obj["population"], int)
--- a/sgl-router/py_test/e2e_grpc/conftest.py
+++ b/sgl-router/py_test/e2e_grpc/conftest.py
@@ -7,7 +7,7 @@ This module provides shared fixtures that can be used across all gRPC router tes
 import sys
 from pathlib import Path

-import pytest
+import pytest  # noqa: F401

 # Ensure router py_src is importable
 _ROUTER_ROOT = Path(__file__).resolve().parents[2]
--- a/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
+++ b/sgl-router/py_test/e2e_grpc/features/test_enable_thinking.py
@@ -6,17 +6,11 @@ python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinki
 python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
 """

-import asyncio
 import json
-import os
 import sys
-import time
 import unittest
-
-# CHANGE: Import router launcher instead of server launcher
 from pathlib import Path

-import openai
 import requests

 _TEST_DIR = Path(__file__).parent
@@ -24,10 +18,8 @@ sys.path.insert(0, str(_TEST_DIR.parent))
 from fixtures import popen_launch_workers_and_router
 from util import (
    DEFAULT_ENABLE_THINKING_MODEL_PATH,
-    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
-    get_tokenizer,
    kill_process_tree,
 )

@@ -131,7 +123,6 @@ class TestEnableThinking(CustomTestCase):
        has_reasoning = False
        has_content = False

-        print("\n=== Stream With Reasoning ===")
        for line in response.iter_lines():
            if line:
                line = line.decode("utf-8")
@@ -176,7 +167,6 @@ class TestEnableThinking(CustomTestCase):
        has_reasoning = False
        has_content = False

-        print("\n=== Stream Without Reasoning ===")
        for line in response.iter_lines():
            if line:
                line = line.decode("utf-8")
--- a/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
+++ b/sgl-router/py_test/e2e_grpc/features/test_reasoning_content.py
@@ -9,15 +9,11 @@ python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningC
 python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
 """

-import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path

 import openai
-import requests

 _TEST_DIR = Path(__file__).parent
 sys.path.insert(0, str(_TEST_DIR.parent))
--- a/sgl-router/py_test/e2e_grpc/fixtures.py
+++ b/sgl-router/py_test/e2e_grpc/fixtures.py
@@ -8,6 +8,7 @@ This module provides fixtures for launching SGLang workers and gRPC router separ
 This approach gives more control and matches production deployment patterns.
 """

+import logging
 import socket
 import subprocess
 import time
@@ -15,6 +16,8 @@ from typing import Optional

 import requests

+logger = logging.getLogger(__name__)
+

 def find_free_port() -> int:
    """Find an available port on localhost."""
@@ -56,9 +59,11 @@ def wait_for_workers_ready(
            attempt += 1
            elapsed = int(time.time() - start_time)

-            # Print progress every 10 seconds
+            # Log progress every 10 seconds
            if elapsed > 0 and elapsed % 10 == 0 and attempt % 10 == 0:
-                print(f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)")
+                logger.info(
+                    f"  Still waiting for workers... ({elapsed}/{timeout}s elapsed)"
+                )

            try:
                response = session.get(
@@ -69,7 +74,7 @@ def wait_for_workers_ready(
                    total_workers = data.get("total", 0)

                    if total_workers == expected_workers:
-                        print(
+                        logger.info(
                            f"  All {expected_workers} workers connected after {elapsed}s"
                        )
                        return
@@ -161,14 +166,14 @@ def popen_launch_workers_and_router(
    else:
        router_port = find_free_port()

-    print(f"\n{'='*70}")
-    print(f"Launching gRPC cluster (separate workers + router)")
-    print(f"{'='*70}")
-    print(f"  Model: {model}")
-    print(f"  Router port: {router_port}")
-    print(f"  Workers: {num_workers}")
-    print(f"  TP size: {tp_size}")
-    print(f"  Policy: {policy}")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"Launching gRPC cluster (separate workers + router)")
+    logger.info(f"{'='*70}")
+    logger.info(f"  Model: {model}")
+    logger.info(f"  Router port: {router_port}")
+    logger.info(f"  Workers: {num_workers}")
+    logger.info(f"  TP size: {tp_size}")
+    logger.info(f"  Policy: {policy}")

    # Step 1: Launch workers with gRPC enabled
    workers = []
@@ -179,9 +184,9 @@ def popen_launch_workers_and_router(
        worker_url = f"grpc://127.0.0.1:{worker_port}"
        worker_urls.append(worker_url)

-        print(f"\n[Worker {i+1}/{num_workers}]")
-        print(f"  Port: {worker_port}")
-        print(f"  URL: {worker_url}")
+        logger.info(f"\n[Worker {i+1}/{num_workers}]")
+        logger.info(f"  Port: {worker_port}")
+        logger.info(f"  URL: {worker_url}")

        # Build worker command
        worker_cmd = [
@@ -226,17 +231,19 @@ def popen_launch_workers_and_router(
            )

        workers.append(worker_proc)
-        print(f"  PID: {worker_proc.pid}")
+        logger.info(f"  PID: {worker_proc.pid}")

    # Give workers a moment to start binding to ports
    # The router will check worker health when it starts
-    print(f"\nWaiting for {num_workers} workers to initialize (20s)...")
+    logger.info(f"\nWaiting for {num_workers} workers to initialize (20s)...")
    time.sleep(20)

    # Quick check: make sure worker processes are still alive
    for i, worker in enumerate(workers):
        if worker.poll() is not None:
-            print(f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})")
+            logger.error(
+                f"  ✗ Worker {i+1} died during startup (exit code: {worker.poll()})"
+            )
            # Cleanup: kill all workers
            for w in workers:
                try:
@@ -245,12 +252,14 @@ def popen_launch_workers_and_router(
                    pass
            raise RuntimeError(f"Worker {i+1} failed to start")

-    print(f"✓ All {num_workers} workers started (router will verify connectivity)")
+    logger.info(
+        f"✓ All {num_workers} workers started (router will verify connectivity)"
+    )

    # Step 2: Launch router pointing to workers
-    print(f"\n[Router]")
-    print(f"  Port: {router_port}")
-    print(f"  Worker URLs: {', '.join(worker_urls)}")
+    logger.info(f"\n[Router]")
+    logger.info(f"  Port: {router_port}")
+    logger.info(f"  Worker URLs: {', '.join(worker_urls)}")

    # Build router command
    router_cmd = [
@@ -284,7 +293,7 @@ def popen_launch_workers_and_router(
        router_cmd.extend(router_args)

    if show_output:
-        print(f"  Command: {' '.join(router_cmd)}")
+        logger.info(f"  Command: {' '.join(router_cmd)}")

    # Launch router
    if show_output:
@@ -296,19 +305,19 @@ def popen_launch_workers_and_router(
            stderr=subprocess.PIPE,
        )

-    print(f"  PID: {router_proc.pid}")
+    logger.info(f"  PID: {router_proc.pid}")

    # Wait for router to be ready
    router_url = f"http://127.0.0.1:{router_port}"
-    print(f"\nWaiting for router to start at {router_url}...")
+    logger.info(f"\nWaiting for router to start at {router_url}...")

    try:
        wait_for_workers_ready(
            router_url, expected_workers=num_workers, timeout=180, api_key=api_key
        )
-        print(f"✓ Router ready at {router_url}")
+        logger.info(f"✓ Router ready at {router_url}")
    except TimeoutError:
-        print(f"✗ Router failed to start")
+        logger.error(f"✗ Router failed to start")
        # Cleanup: kill router and all workers
        try:
            router_proc.kill()
@@ -321,11 +330,11 @@ def popen_launch_workers_and_router(
                pass
        raise

-    print(f"\n{'='*70}")
-    print(f"✓ gRPC cluster ready!")
-    print(f"  Router: {router_url}")
-    print(f"  Workers: {len(workers)}")
-    print(f"{'='*70}\n")
+    logger.info(f"\n{'='*70}")
+    logger.info(f"✓ gRPC cluster ready!")
+    logger.info(f"  Router: {router_url}")
+    logger.info(f"  Workers: {len(workers)}")
+    logger.info(f"{'='*70}\n")

    return {
        "workers": workers,
--- a/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
+++ b/sgl-router/py_test/e2e_grpc/function_call/test_openai_function_calling.py
@@ -13,10 +13,7 @@ Run with:
 """

 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
-import time
 import unittest
 from pathlib import Path

--- a/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
+++ b/sgl-router/py_test/e2e_grpc/function_call/test_tool_choice.py
@@ -8,8 +8,6 @@ Tests: required, auto, and specific function choices in both streaming and non-s
 """

 import json
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import unittest
 from pathlib import Path
--- a/sgl-router/py_test/e2e_grpc/util.py
+++ b/sgl-router/py_test/e2e_grpc/util.py
@@ -8,6 +8,7 @@ Extracted and adapted from:
 - sglang.test.test_utils (constants and CustomTestCase)
 """

+import logging
 import os
 import signal
 import threading
@@ -17,6 +18,8 @@ from typing import Optional, Union

 import psutil

+logger = logging.getLogger(__name__)
+
 try:
    from transformers import (
        AutoTokenizer,
@@ -204,8 +207,8 @@ def get_tokenizer(
        raise RuntimeError(err_msg) from e

    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        print(
-            f"Warning: Using a slow tokenizer. This might cause a performance "
+        logger.warning(
+            f"Using a slow tokenizer. This might cause a performance "
            f"degradation. Consider using a fast tokenizer instead."
        )

@@ -245,14 +248,10 @@ class CustomTestCase(unittest.TestCase):
                return super(CustomTestCase, self)._callTestMethod(method)
            except Exception as e:
                if attempt < max_retry:
-                    print(
+                    logger.info(
                        f"Test failed on attempt {attempt + 1}/{max_retry + 1}, retrying..."
                    )
                    continue
                else:
                    # Last attempt, re-raise the exception
                    raise
-
-    def setUp(self):
-        """Print test method name at the start of each test."""
-        print(f"[Test Method] {self._testMethodName}", flush=True)
--- a/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
+++ b/sgl-router/py_test/e2e_grpc/validation/test_large_max_new_tokens.py
@@ -3,8 +3,6 @@ python3 -m unittest openai_server.validation.test_large_max_new_tokens.TestLarge
 """

 import os
-
-# CHANGE: Import router launcher instead of server launcher
 import sys
 import time
 import unittest
@@ -104,7 +102,6 @@ class TestLargeMaxNewTokens(CustomTestCase):
                self.stderr.flush()
                lines = open(STDERR_FILENAME).readlines()
                for line in lines[pt:]:
-                    print(line, end="", flush=True)
                    if f"#running-req: {num_requests}" in line:
                        all_requests_running = True
                        pt = -1
--- a/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
+++ b/sgl-router/py_test/e2e_grpc/validation/test_openai_server_ignore_eos.py
@@ -12,7 +12,6 @@ Run with:
    pytest py_test/e2e_grpc/e2e_grpc/validation/test_openai_server_ignore_eos.py -v
 """

-# CHANGE: Import router launcher instead of server launcher
 import sys
 from pathlib import Path