Re-organize CI tests (#1052)

This commit is contained in:
Lianmin Zheng
2024-08-12 03:39:01 -07:00
committed by GitHub
parent 0c1c72a0b4
commit c877292cc1
9 changed files with 148 additions and 22 deletions

View File

@@ -12,6 +12,7 @@ suites = {
"test_openai_server.py",
"test_skip_tokenizer_init.py",
"test_torch_compile.py",
"test_triton_attn_backend.py",
"test_vision_openai_server.py",
"test_large_max_new_tokens.py",
"models/test_generation_models.py",

View File

@@ -0,0 +1,68 @@
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = "http://127.0.0.1:7157"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=300,
other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"],
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=3000,
num_threads=1024,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.71, f"{metrics}"
def test_human_eval(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="humaneval",
num_examples=None,
num_threads=1024,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.65, f"{metrics}"
def test_mgsm_en(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mgsm_en",
num_examples=None,
num_threads=1024,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.85, f"{metrics}"
if __name__ == "__main__":
unittest.main()

View File

@@ -3,6 +3,7 @@ import unittest
from types import SimpleNamespace
from sglang.bench_serving import run_benchmark
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
@@ -60,9 +61,9 @@ class TestServingThroughput(unittest.TestCase):
def test_default(self):
res = self.run_test(
disable_radix_cache=False,
disable_flashinfer=False,
chunked_prefill_size=-1,
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=ServerArgs.chunked_prefill_size,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
@@ -72,21 +73,25 @@ class TestServingThroughput(unittest.TestCase):
def test_default_without_radix_cache(self):
res = self.run_test(
disable_radix_cache=True,
disable_flashinfer=False,
chunked_prefill_size=-1,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=ServerArgs.chunked_prefill_size,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] >= 1450
def test_default_without_flashinfer(self):
self.run_test(
disable_radix_cache=False,
disable_flashinfer=True,
chunked_prefill_size=-1,
def test_default_with_chunked_prefill(self):
res = self.run_test(
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=8192,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
assert res["output_throughput"] >= 1400
def test_all_cases(self):
for disable_radix_cache in [False, True]:
for disable_flashinfer in [False, True]:

View File

@@ -0,0 +1,41 @@
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestTritonAttnBackend(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"]
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=32,
num_threads=32,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.6
if __name__ == "__main__":
unittest.main()