From 26fc32d1682383e9df422c21cbc3609359f4d834 Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Mon, 28 Apr 2025 19:27:37 -0700
Subject: [PATCH] [CI] tune the test order to warmup the server (#5860)

---
 python/sglang/srt/layers/quantization/deep_gemm.py | 2 +-
 test/srt/test_full_deepseek_v3.py                  | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/layers/quantization/deep_gemm.py b/python/sglang/srt/layers/quantization/deep_gemm.py
index 08ba0b9f9..6fa7a6dd6 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm.py
@@ -293,7 +293,7 @@ def _maybe_compile_deep_gemm_one_type_all(
         logger.info(
             f"Try DeepGEMM JIT Compiling for "
             f"<{kernel_helper.name}> N={n}, K={k}, num_groups={num_groups} with all Ms."
-            f"{' It only takes a litte time(Typically 1 sec) if you have run `sglang.compile_deep_gemm`. ' if not _IN_PRECOMPILE_STAGE else ''}"
+            f"{' It only takes a litte time (typically 1 sec) if you have run `python3 -m sglang.compile_deep_gemm`. ' if not _IN_PRECOMPILE_STAGE else ''}"
         )
 
         # NOTE(alcanderian): get_num_sms should be change when 2-batch-overlap is introduced
diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py
index 02a93f1c5..7b29787b1 100644
--- a/test/srt/test_full_deepseek_v3.py
+++ b/test/srt/test_full_deepseek_v3.py
@@ -35,7 +35,9 @@ class TestDeepseekV3(CustomTestCase):
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
 
-    def test_gsm8k(self):
+    def test_a_gsm8k(
+        self,
+    ):  # Append an "a" to make this test run first (alphabetically) to warm up the server
         args = SimpleNamespace(
             num_shots=8,
             data_path=None,
@@ -100,7 +102,9 @@ class TestDeepseekV3MTP(CustomTestCase):
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
 
-    def test_gsm8k(self):
+    def test_a_gsm8k(
+        self,
+    ):  # Append an "a" to make this test run first (alphabetically) to warm up the server
         requests.get(self.base_url + "/flush_cache")
 
         args = SimpleNamespace(