From 254fd130e27363de8d56364e5a13fad0188fb7a2 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 25 Nov 2024 04:58:16 -0800 Subject: [PATCH] [CI] Split test cases in CI for better load balancing (#2180) --- scripts/ci_install_dependency.sh | 4 +++ test/srt/run_suite.py | 3 +- test/srt/test_chunked_prefill.py | 22 +------------- test/srt/test_no_chunked_prefill.py | 29 +++++++++++++++++++ ...eduler.py => test_no_overlap_scheduler.py} | 0 test/srt/test_srt_endpoint.py | 2 +- 6 files changed, 37 insertions(+), 23 deletions(-) create mode 100644 test/srt/test_no_chunked_prefill.py rename test/srt/{test_non_overlap_scheduler.py => test_no_overlap_scheduler.py} (100%) diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index fd0299db0..080a909c5 100644 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -6,3 +6,7 @@ pip install --upgrade pip pip install -e "python[all]" pip install transformers==4.45.2 sentence_transformers accelerate peft pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall +# for compling eagle kernels +pip install cutex +# for compling xgrammar kernels +pip install cuda-python nvidia-cuda-nvrtc-cu12 diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index b857aec51..9bd7b9810 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -17,7 +17,8 @@ suites = { "test_json_constrained.py", "test_large_max_new_tokens.py", "test_metrics.py", - "test_non_overlap_scheduler.py", + "test_no_chunked_prefill.py", + "test_no_overlap_scheduler.py", "test_openai_server.py", "test_pytorch_sampling_backend.py", "test_radix_attention.py", diff --git a/test/srt/test_chunked_prefill.py b/test/srt/test_chunked_prefill.py index 0930603fe..cafd99931 100644 --- a/test/srt/test_chunked_prefill.py +++ b/test/srt/test_chunked_prefill.py @@ -4,12 +4,7 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p import unittest -from sglang.test.test_utils import ( - DEFAULT_MODEL_NAME_FOR_TEST, - run_bench_serving, - run_mmlu_test, - run_mulit_request_test, -) +from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test class TestChunkedPrefill(unittest.TestCase): @@ -25,21 +20,6 @@ class TestChunkedPrefill(unittest.TestCase): def test_mixed_chunked_prefill_without_radix_cache(self): run_mmlu_test(disable_radix_cache=True, enable_mixed_chunk=True) - def test_no_chunked_prefill(self): - run_mmlu_test( - disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1 - ) - - def test_no_chunked_prefill_without_radix_cache(self): - res = run_bench_serving( - model=DEFAULT_MODEL_NAME_FOR_TEST, - num_prompts=10, - request_rate=float("inf"), - other_server_args=["--disable-radix-cache", "--chunked-prefill-size", "-1"], - ) - - assert res["completed"] == 10 - def test_mixed_chunked_prefill_multi_requests(self): run_mulit_request_test( enable_mixed_chunk=True, diff --git a/test/srt/test_no_chunked_prefill.py b/test/srt/test_no_chunked_prefill.py new file mode 100644 index 000000000..8252c9ae0 --- /dev/null +++ b/test/srt/test_no_chunked_prefill.py @@ -0,0 +1,29 @@ +import unittest + +from sglang.test.test_utils import ( + DEFAULT_MODEL_NAME_FOR_TEST, + run_bench_serving, + run_mmlu_test, +) + + +class TestNoChunkedPrefill(unittest.TestCase): + + def test_no_chunked_prefill(self): + run_mmlu_test( + disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1 + ) + + def test_no_chunked_prefill_without_radix_cache(self): + res = run_bench_serving( + model=DEFAULT_MODEL_NAME_FOR_TEST, + num_prompts=10, + request_rate=float("inf"), + other_server_args=["--disable-radix-cache", "--chunked-prefill-size", "-1"], + ) + + assert res["completed"] == 10 + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_non_overlap_scheduler.py b/test/srt/test_no_overlap_scheduler.py similarity index 100% rename from test/srt/test_non_overlap_scheduler.py rename to test/srt/test_no_overlap_scheduler.py diff --git a/test/srt/test_srt_endpoint.py b/test/srt/test_srt_endpoint.py index e5018a02c..9f2e15641 100644 --- a/test/srt/test_srt_endpoint.py +++ b/test/srt/test_srt_endpoint.py @@ -211,7 +211,7 @@ class TestSRTEndpoint(unittest.TestCase): diff = np.abs(output_logprobs - output_logprobs_score) max_diff = np.max(diff) - self.assertLess(max_diff, 0.2) + self.assertLess(max_diff, 0.25) def test_get_server_info(self): response = requests.get(self.base_url + "/get_server_info")