From 26fc32d1682383e9df422c21cbc3609359f4d834 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 28 Apr 2025 19:27:37 -0700 Subject: [PATCH] [CI] tune the test order to warmup the server (#5860) --- python/sglang/srt/layers/quantization/deep_gemm.py | 2 +- test/srt/test_full_deepseek_v3.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/layers/quantization/deep_gemm.py b/python/sglang/srt/layers/quantization/deep_gemm.py index 08ba0b9f9..6fa7a6dd6 100644 --- a/python/sglang/srt/layers/quantization/deep_gemm.py +++ b/python/sglang/srt/layers/quantization/deep_gemm.py @@ -293,7 +293,7 @@ def _maybe_compile_deep_gemm_one_type_all( logger.info( f"Try DeepGEMM JIT Compiling for " f"<{kernel_helper.name}> N={n}, K={k}, num_groups={num_groups} with all Ms." - f"{' It only takes a litte time(Typically 1 sec) if you have run `sglang.compile_deep_gemm`. ' if not _IN_PRECOMPILE_STAGE else ''}" + f"{' It only takes a litte time (typically 1 sec) if you have run `python3 -m sglang.compile_deep_gemm`. ' if not _IN_PRECOMPILE_STAGE else ''}" ) # NOTE(alcanderian): get_num_sms should be change when 2-batch-overlap is introduced diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index 02a93f1c5..7b29787b1 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -35,7 +35,9 @@ class TestDeepseekV3(CustomTestCase): def tearDownClass(cls): kill_process_tree(cls.process.pid) - def test_gsm8k(self): + def test_a_gsm8k( + self, + ): # Append an "a" to make this test run first (alphabetically) to warm up the server args = SimpleNamespace( num_shots=8, data_path=None, @@ -100,7 +102,9 @@ class TestDeepseekV3MTP(CustomTestCase): def tearDownClass(cls): kill_process_tree(cls.process.pid) - def test_gsm8k(self): + def test_a_gsm8k( + self, + ): # Append an "a" to make this test run first (alphabetically) to warm up the server requests.get(self.base_url + "/flush_cache") args = SimpleNamespace(