Files
sglang/rust/py_test/test_launch_server.py
2024-11-25 16:02:03 -08:00

100 lines
2.3 KiB
Python

import subprocess
import time
import unittest
from types import SimpleNamespace
import requests
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
)
def popen_launch_router(
model: str,
base_url: str,
dp_size: int,
timeout: float,
):
"""
Launch the router server process.
Args:
model: Model path/name
base_url: Server base URL
dp_size: Data parallel size
timeout: Server launch timeout
"""
_, host, port = base_url.split(":")
host = host[2:]
command = [
"python3",
"-m",
"sglang_router.launch_server",
"--model-path",
model,
"--host",
host,
"--port",
port,
"--dp",
str(dp_size), # Convert dp_size to string
]
# Use current environment
env = None
process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
start_time = time.time()
with requests.Session() as session:
while time.time() - start_time < timeout:
try:
response = session.get(f"{base_url}/health")
if response.status_code == 200:
return process
except requests.RequestException:
pass
time.sleep(10)
raise TimeoutError("Server failed to start within the timeout period.")
class TestEvalAccuracyMini(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_router(
cls.model,
cls.base_url,
dp_size=1,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=64,
num_threads=32,
temperature=0.1,
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.65)
if __name__ == "__main__":
unittest.main()