Clean up metrics code (#1972)

This commit is contained in:
Lianmin Zheng
2024-11-09 15:43:20 -08:00
committed by GitHub
parent 549e8b8366
commit 9c939a3d8b
16 changed files with 101 additions and 107 deletions

View File

@@ -3,6 +3,7 @@ python3 -m unittest test_large_max_new_tokens.TestLargeMaxNewTokens.test_chat_co
"""
import os
import time
import unittest
from concurrent.futures import ThreadPoolExecutor
@@ -11,7 +12,7 @@ import openai
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
@@ -21,7 +22,7 @@ from sglang.test.test_utils import (
class TestLargeMaxNewTokens(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
@@ -33,12 +34,19 @@ class TestLargeMaxNewTokens(unittest.TestCase):
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=("--max-total-token", "1024", "--context-len", "8192"),
other_args=(
"--max-total-token",
"1024",
"--context-len",
"8192",
"--decode-log-interval",
"2",
),
env={"SGLANG_CLIP_MAX_NEW_TOKENS_ESTIMATION": "256", **os.environ},
return_stdout_stderr=(cls.stdout, cls.stderr),
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST)
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@classmethod
def tearDownClass(cls):
@@ -75,6 +83,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
# Ensure that they are running concurrently
pt = 0
while pt >= 0:
time.sleep(5)
lines = open("stderr.txt").readlines()
for line in lines[pt:]:
print(line, end="", flush=True)