Clean up metrics code (#1972)

This commit is contained in:
Lianmin Zheng
2024-11-09 15:43:20 -08:00
committed by GitHub
parent 549e8b8366
commit 9c939a3d8b
16 changed files with 101 additions and 107 deletions

View File

@@ -16,6 +16,7 @@ suites = {
"test_eval_accuracy_mini.py",
"test_json_constrained.py",
"test_large_max_new_tokens.py",
"test_metrics.py",
"test_openai_server.py",
"test_overlap_schedule.py",
"test_pytorch_sampling_backend.py",

View File

@@ -1,7 +1,5 @@
import subprocess
import unittest
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST,

View File

@@ -6,7 +6,7 @@ import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
@@ -15,7 +15,7 @@ from sglang.test.test_utils import (
class TestCacheReport(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.min_cached = 5
cls.process = popen_launch_server(

View File

@@ -3,6 +3,7 @@ python3 -m unittest test_large_max_new_tokens.TestLargeMaxNewTokens.test_chat_co
"""
import os
import time
import unittest
from concurrent.futures import ThreadPoolExecutor
@@ -11,7 +12,7 @@ import openai
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -21,7 +22,7 @@ from sglang.test.test_utils import (
class TestLargeMaxNewTokens(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
@@ -33,12 +34,19 @@ class TestLargeMaxNewTokens(unittest.TestCase):
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=("--max-total-token", "1024", "--context-len", "8192"),
other_args=(
"--max-total-token",
"1024",
"--context-len",
"8192",
"--decode-log-interval",
"2",
),
env={"SGLANG_CLIP_MAX_NEW_TOKENS_ESTIMATION": "256", **os.environ},
return_stdout_stderr=(cls.stdout, cls.stderr),
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST)
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@classmethod
def tearDownClass(cls):
@@ -75,6 +83,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
# Ensure that they are running concurrently
pt = 0
while pt >= 0:
time.sleep(5)
lines = open("stderr.txt").readlines()
for line in lines[pt:]:
print(line, end="", flush=True)

View File

@@ -1,31 +1,24 @@
import unittest
from types import SimpleNamespace
import requests
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
TEST_MODEL = (
DEFAULT_MODEL_NAME_FOR_TEST # I used "google/gemma-2-2b-it" for testing locally
)
class TestEnableMetrics(unittest.TestCase):
def test_metrics_enabled(self):
"""Test that metrics endpoint returns data when enabled"""
# Launch server with metrics enabled
process = popen_launch_server(
model=TEST_MODEL,
base_url=DEFAULT_URL_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
enable_metrics=True,
other_args=["--enable-metrics"],
)
try:
@@ -38,6 +31,8 @@ class TestEnableMetrics(unittest.TestCase):
self.assertEqual(metrics_response.status_code, 200)
metrics_content = metrics_response.text
print(f"{metrics_content=}")
# Verify essential metrics are present
essential_metrics = [
"sglang:prompt_tokens_total",
@@ -53,7 +48,7 @@ class TestEnableMetrics(unittest.TestCase):
self.assertIn(metric, metrics_content, f"Missing metric: {metric}")
# Verify model name label is present and correct
expected_model_name = TEST_MODEL
expected_model_name = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
self.assertIn(f'model_name="{expected_model_name}"', metrics_content)
# Verify metrics have values (not empty)
self.assertIn("_sum{", metrics_content)
@@ -63,22 +58,6 @@ class TestEnableMetrics(unittest.TestCase):
finally:
kill_child_process(process.pid, include_self=True)
def test_metrics_disabled(self):
"""Test that metrics endpoint returns 404 when disabled"""
# Launch server with metrics disabled
process = popen_launch_server(
model=TEST_MODEL,
base_url=DEFAULT_URL_FOR_TEST,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
enable_metrics=False,
)
try:
response = requests.get(f"{DEFAULT_URL_FOR_TEST}/health_generate")
self.assertEqual(response.status_code, 200)
# Verify metrics endpoint is not available
metrics_response = requests.get(f"{DEFAULT_URL_FOR_TEST}/metrics")
self.assertEqual(metrics_response.status_code, 404)
finally:
kill_child_process(process.pid, include_self=True)
if __name__ == "__main__":
unittest.main()

View File

@@ -13,7 +13,7 @@ import openai
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -23,7 +23,7 @@ from sglang.test.test_utils import (
class TestOpenAIServer(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
@@ -33,7 +33,7 @@ class TestOpenAIServer(unittest.TestCase):
api_key=cls.api_key,
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST)
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@classmethod
def tearDownClass(cls):

View File

@@ -5,7 +5,7 @@ import unittest
import requests
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
kill_child_process,
@@ -62,7 +62,7 @@ def run_test(base_url, nodes):
class TestRadixCacheFCFS(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
@@ -90,7 +90,7 @@ class TestRadixCacheFCFS(unittest.TestCase):
class TestRadixCacheLPM(TestRadixCacheFCFS):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
@@ -110,7 +110,7 @@ class TestRadixCacheLPM(TestRadixCacheFCFS):
class TestRadixCacheOverlapLPM(TestRadixCacheFCFS):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,

View File

@@ -9,7 +9,7 @@ import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -19,7 +19,7 @@ from sglang.test.test_utils import (
class TestSkipTokenizerInit(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,

View File

@@ -10,7 +10,7 @@ import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -20,7 +20,7 @@ from sglang.test.test_utils import (
class TestSRTEndpoint(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH

View File

@@ -11,14 +11,17 @@ from types import SimpleNamespace
import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.test.few_shot_gsm8k_engine import run_eval
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
)
class TestSRTEngine(unittest.TestCase):
def test_1_engine_runtime_consistency(self):
prompt = "Today is a sunny day and I like"
model_path = DEFAULT_MODEL_NAME_FOR_TEST
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
sampling_params = {"temperature": 0, "max_new_tokens": 8}
@@ -40,7 +43,7 @@ class TestSRTEngine(unittest.TestCase):
def test_2_engine_multiple_generate(self):
# just to ensure there is no issue running multiple generate calls
prompt = "Today is a sunny day and I like"
model_path = DEFAULT_MODEL_NAME_FOR_TEST
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
sampling_params = {"temperature": 0, "max_new_tokens": 8}
@@ -66,7 +69,7 @@ class TestSRTEngine(unittest.TestCase):
# Create an LLM.
llm = sgl.Engine(
model_path=DEFAULT_MODEL_NAME_FOR_TEST,
model_path=DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
log_level="error",
)
@@ -110,7 +113,7 @@ class TestSRTEngine(unittest.TestCase):
def test_5_prompt_input_ids_consistency(self):
prompt = "The capital of UK is"
model_path = DEFAULT_MODEL_NAME_FOR_TEST
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
engine = sgl.Engine(model_path=model_path, random_seed=42, log_level="error")
sampling_params = {"temperature": 0, "max_new_tokens": 8}
out1 = engine.generate(prompt, sampling_params)["text"]

View File

@@ -5,7 +5,7 @@ import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -15,7 +15,7 @@ from sglang.test.test_utils import (
class TestUpdateWeights(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
@@ -64,7 +64,7 @@ class TestUpdateWeights(unittest.TestCase):
origin_response = self.run_decode()
# update weights
new_model_path = "meta-llama/Meta-Llama-3.1-8B"
new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "")
ret = self.run_update_weights(new_model_path)
assert ret["success"]
@@ -92,7 +92,7 @@ class TestUpdateWeights(unittest.TestCase):
origin_response = self.run_decode()
# update weights
new_model_path = "meta-llama/Meta-Llama-3.1-8B-1"
new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "wrong")
ret = self.run_update_weights(new_model_path)
assert not ret["success"]