Add retry for flaky tests in CI (#4755)

2025-03-26 07:53:12 +08:00
parent 52029bd1e3
commit 15ddd84322
112 changed files with 273 additions and 152 deletions
--- a/test/lang/test_anthropic_backend.py
+++ b/test/lang/test_anthropic_backend.py
@@ -3,9 +3,10 @@ import unittest

 from sglang import Anthropic, set_default_backend
 from sglang.test.test_programs import test_mt_bench, test_stream
+from sglang.test.test_utils import CustomTestCase


-class TestAnthropicBackend(unittest.TestCase):
+class TestAnthropicBackend(CustomTestCase):
    backend = None

    @classmethod
--- a/test/lang/test_bind_cache.py
+++ b/test/lang/test_bind_cache.py
@@ -1,10 +1,10 @@
 import unittest

 import sglang as sgl
-from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase


-class TestBind(unittest.TestCase):
+class TestBind(CustomTestCase):
    backend = None

    @classmethod
--- a/test/lang/test_choices.py
+++ b/test/lang/test_choices.py
@@ -7,6 +7,7 @@ from sglang.lang.choices import (
    token_length_normalized,
    unconditional_likelihood_normalized,
 )
+from sglang.test.test_utils import CustomTestCase

 MOCK_CHOICES_INPUT_DATA = {
    "choices": [
@@ -51,7 +52,7 @@ MOCK_CHOICES_INPUT_DATA = {
 }


-class TestChoices(unittest.TestCase):
+class TestChoices(CustomTestCase):

    def test_token_length_normalized(self):
        """Confirm 'antidisestablishmentarianism' is selected due to high confidences for
--- a/test/lang/test_litellm_backend.py
+++ b/test/lang/test_litellm_backend.py
@@ -3,9 +3,10 @@ import unittest

 from sglang import LiteLLM, set_default_backend
 from sglang.test.test_programs import test_mt_bench, test_stream
+from sglang.test.test_utils import CustomTestCase


-class TestAnthropicBackend(unittest.TestCase):
+class TestAnthropicBackend(CustomTestCase):
    chat_backend = None

    @classmethod
--- a/test/lang/test_openai_backend.py
+++ b/test/lang/test_openai_backend.py
@@ -17,9 +17,10 @@ from sglang.test.test_programs import (
    test_stream,
    test_tool_use,
 )
+from sglang.test.test_utils import CustomTestCase


-class TestOpenAIBackend(unittest.TestCase):
+class TestOpenAIBackend(CustomTestCase):
    instruct_backend = None
    chat_backend = None
    chat_vision_backend = None
--- a/test/lang/test_srt_backend.py
+++ b/test/lang/test_srt_backend.py
@@ -22,10 +22,10 @@ from sglang.test.test_programs import (
    test_stream,
    test_tool_use,
 )
-from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase


-class TestSRTBackend(unittest.TestCase):
+class TestSRTBackend(CustomTestCase):
    backend = None

    @classmethod
--- a/test/lang/test_tracing.py
+++ b/test/lang/test_tracing.py
@@ -3,9 +3,10 @@ import unittest
 import sglang as sgl
 from sglang.lang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
+from sglang.test.test_utils import CustomTestCase


-class TestTracing(unittest.TestCase):
+class TestTracing(CustomTestCase):
    def test_few_shot_qa(self):
        @sgl.function
        def few_shot_qa(s, question):
--- a/test/lang/test_vertexai_backend.py
+++ b/test/lang/test_vertexai_backend.py
@@ -10,9 +10,10 @@ from sglang.test.test_programs import (
    test_parallel_encoding,
    test_stream,
 )
+from sglang.test.test_utils import CustomTestCase


-class TestVertexAIBackend(unittest.TestCase):
+class TestVertexAIBackend(CustomTestCase):
    backend = None

    @classmethod
--- a/test/srt/models/lora/test_lora.py
+++ b/test/srt/models/lora/test_lora.py
@@ -18,6 +18,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
+from sglang.test.test_utils import CustomTestCase

 LORA_SETS = [
    # {
@@ -70,7 +71,7 @@ What do you know about llamas?
 #     PROMPTS.append(sample[0]["content"][:2000])


-class TestLoRA(unittest.TestCase):
+class TestLoRA(CustomTestCase):

    def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
        print("=================== testing inference =======================")
--- a/test/srt/models/lora/test_lora_backend.py
+++ b/test/srt/models/lora/test_lora_backend.py
@@ -21,7 +21,7 @@ import torch
 from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import calculate_rouge_l, is_in_ci
+from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci

 CI_LORA_MODELS = [
    LoRAModelCase(
@@ -67,7 +67,7 @@ PROMPTS = [
 ]


-class TestLoRABackend(unittest.TestCase):
+class TestLoRABackend(CustomTestCase):
    def run_backend(
        self,
        prompt: str,
--- a/test/srt/models/lora/test_lora_tp.py
+++ b/test/srt/models/lora/test_lora_tp.py
@@ -21,7 +21,7 @@ import torch
 from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import calculate_rouge_l, is_in_ci
+from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci

 CI_LORA_MODELS = [
    LoRAModelCase(
@@ -69,7 +69,7 @@ PROMPTS = [
 BACKEND = "triton"


-class TestLoRATP(unittest.TestCase):
+class TestLoRATP(CustomTestCase):
    def run_tp(
        self,
        prompt: str,
--- a/test/srt/models/lora/test_multi_lora_backend.py
+++ b/test/srt/models/lora/test_multi_lora_backend.py
@@ -19,7 +19,7 @@ from typing import List
 import torch
 from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

-from sglang.test.test_utils import is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci

 MULTI_LORA_MODELS = [
    LoRAModelCase(
@@ -51,7 +51,7 @@ PROMPTS = [
 ]


-class TestMultiLoRABackend(unittest.TestCase):
+class TestMultiLoRABackend(CustomTestCase):
    def run_backend_batch(
        self,
        prompts: List[str],
--- a/test/srt/models/test_embedding_models.py
+++ b/test/srt/models/test_embedding_models.py
@@ -20,7 +20,7 @@ import torch
 from transformers import AutoConfig, AutoTokenizer

 from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
-from sglang.test.test_utils import get_similarities, is_in_ci
+from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci

 MODELS = [
    ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
@@ -31,7 +31,7 @@ MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestEmbeddingModels(unittest.TestCase):
+class TestEmbeddingModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -33,7 +33,7 @@ from sglang.test.runners import (
    SRTRunner,
    check_close_model_outputs,
 )
-from sglang.test.test_utils import is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci


@dataclasses.dataclass
@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestGenerationModels(unittest.TestCase):
+class TestGenerationModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/models/test_gme_qwen_models.py
+++ b/test/srt/models/test_gme_qwen_models.py
@@ -19,7 +19,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import get_similarities
+from sglang.test.test_utils import CustomTestCase, get_similarities

 TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
 IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
@@ -31,7 +31,7 @@ MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestQmeQwenModels(unittest.TestCase):
+class TestQmeQwenModels(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        mp.set_start_method("spawn", force=True)
--- a/test/srt/models/test_grok_models.py
+++ b/test/srt/models/test_grok_models.py
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestGrok(unittest.TestCase):
+class TestGrok(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmzheng/grok-1"
--- a/test/srt/models/test_qwen_models.py
+++ b/test/srt/models/test_qwen_models.py
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestQwen2(unittest.TestCase):
+class TestQwen2(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "Qwen/Qwen2-7B-Instruct"
@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.78)


-class TestQwen2FP8(unittest.TestCase):
+class TestQwen2FP8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
--- a/test/srt/models/test_reward_models.py
+++ b/test/srt/models/test_reward_models.py
@@ -18,6 +18,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
+from sglang.test.test_utils import CustomTestCase

 MODELS = [
    ("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
@@ -41,7 +42,7 @@ CONVS = [
 ]


-class TestRewardModels(unittest.TestCase):
+class TestRewardModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/test_abort.py
+++ b/test/srt/test_abort.py
@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor

 import requests

-from sglang.test.test_utils import run_and_check_memory_leak
+from sglang.test.test_utils import CustomTestCase, run_and_check_memory_leak


-class TestAbort(unittest.TestCase):
+class TestAbort(CustomTestCase):
    def workload_func(self, base_url, model):
        def process_func():
            def run_one(_):
--- a/test/srt/test_awq.py
+++ b/test/srt/test_awq.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestAWQ(unittest.TestCase):
+class TestAWQ(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST
--- a/test/srt/test_bench_one_batch.py
+++ b/test/srt/test_bench_one_batch.py
@@ -3,6 +3,7 @@ import unittest
 from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
+    CustomTestCase,
    get_bool_env_var,
    is_in_ci,
    run_bench_one_batch,
@@ -10,7 +11,7 @@ from sglang.test.test_utils import (
 )


-class TestBenchOneBatch(unittest.TestCase):
+class TestBenchOneBatch(CustomTestCase):
    def test_bs1(self):
        output_throughput = run_bench_one_batch(
            DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]
--- a/test/srt/test_bench_serving.py
+++ b/test/srt/test_bench_serving.py
@@ -6,13 +6,14 @@ from sglang.test.test_utils import (
    DEFAULT_FP8_MODEL_NAME_FOR_TEST,
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    run_bench_serving,
    write_github_step_summary,
 )


-class TestBenchServing(unittest.TestCase):
+class TestBenchServing(CustomTestCase):

    def test_offline_throughput_default(self):
        res = run_bench_serving(
--- a/test/srt/test_block_int8.py
+++ b/test/srt/test_block_int8.py
@@ -5,6 +5,7 @@ import torch

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
+from sglang.test.test_utils import CustomTestCase


 # For test
@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
    ).sum(dim=1)


-class TestW8A8BlockINT8FusedMoE(unittest.TestCase):
+class TestW8A8BlockINT8FusedMoE(CustomTestCase):
    DTYPES = [torch.half, torch.bfloat16]
    M = [1, 33, 64, 222]
    N = [128, 1024]
--- a/test/srt/test_cache_report.py
+++ b/test/srt/test_cache_report.py
@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestCacheReport(unittest.TestCase):
+class TestCacheReport(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_chunked_prefill.py
+++ b/test/srt/test_chunked_prefill.py
@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p

 import unittest

-from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test
+from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test


-class TestChunkedPrefill(unittest.TestCase):
+class TestChunkedPrefill(CustomTestCase):
    def test_chunked_prefill(self):
        run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)

--- a/test/srt/test_create_kvindices.py
+++ b/test/srt/test_create_kvindices.py
@@ -5,9 +5,10 @@ import numpy as np
 import torch

 from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
+from sglang.test.test_utils import CustomTestCase


-class TestCreateKvIndices(unittest.TestCase):
+class TestCreateKvIndices(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        if not torch.cuda.is_available():
--- a/test/srt/test_custom_allreduce.py
+++ b/test/srt/test_custom_allreduce.py
@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import (
    graph_capture,
    initialize_model_parallel,
 )
+from sglang.test.test_utils import CustomTestCase


 def get_open_port() -> int:
@@ -54,7 +55,7 @@ def multi_process_parallel(
    ray.shutdown()


-class TestCustomAllReduce(unittest.TestCase):
+class TestCustomAllReduce(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        random.seed(42)
--- a/test/srt/test_data_parallelism.py
+++ b/test/srt/test_data_parallelism.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestDataParallelism(unittest.TestCase):
+class TestDataParallelism(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_double_sparsity.py
+++ b/test/srt/test_double_sparsity.py
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestDoubleSparsity(unittest.TestCase):
+class TestDoubleSparsity(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_dp_attention.py
+++ b/test/srt/test_dp_attention.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestDPAttentionDP2TP2(unittest.TestCase):
+class TestDPAttentionDP2TP2(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
--- a/test/srt/test_eagle_infer.py
+++ b/test/srt/test_eagle_infer.py
@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
    DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
    run_logprob_check,
 )
@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2
 decode_tolerance: float = 5e-2


-class TestEAGLEEngine(unittest.TestCase):
+class TestEAGLEEngine(CustomTestCase):
    BASE_CONFIG = {
        "model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
        "speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine):
    NUM_CONFIGS = 1


-class TestEAGLEServer(unittest.TestCase):
+class TestEAGLEServer(CustomTestCase):
    PROMPTS = [
        "[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
        '[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',
--- a/test/srt/test_ebnf_constrained.py
+++ b/test/srt/test_ebnf_constrained.py
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
    )


-class TestEBNFConstrained(unittest.TestCase):
+class TestEBNFConstrained(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, "xgrammar", disable_overlap=False)
--- a/test/srt/test_embedding_openai_server.py
+++ b/test/srt/test_embedding_openai_server.py
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestOpenAIServer(unittest.TestCase):
+class TestOpenAIServer(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "intfloat/e5-mistral-7b-instruct"
--- a/test/srt/test_eval_accuracy_large.py
+++ b/test/srt/test_eval_accuracy_large.py
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
    write_github_step_summary,
 )


-class TestEvalAccuracyLarge(unittest.TestCase):
+class TestEvalAccuracyLarge(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_eval_fp8_accuracy.py
+++ b/test/srt/test_eval_fp8_accuracy.py
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestEvalFP8Accuracy(unittest.TestCase):
+class TestEvalFP8Accuracy(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
        self.assertGreaterEqual(metrics["score"], 0.61)


-class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
+class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):

    def _run_test(self, model, other_args, expected_score):
        base_url = DEFAULT_URL_FOR_TEST
@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
        )


-class TestEvalFP8ModelOptQuantAccuracy(unittest.TestCase):
+class TestEvalFP8ModelOptQuantAccuracy(CustomTestCase):

    def _run_test(self, model, other_args, expected_score):
        base_url = DEFAULT_URL_FOR_TEST
--- a/test/srt/test_expert_distribution.py
+++ b/test/srt/test_expert_distribution.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestExpertDistribution(unittest.TestCase):
+class TestExpertDistribution(CustomTestCase):
    def setUp(self):
        # Clean up any existing expert distribution files before each test
        for f in glob.glob("expert_distribution_*.csv"):
--- a/test/srt/test_fim_completion.py
+++ b/test/srt/test_fim_completion.py
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestFimCompletion(unittest.TestCase):
+class TestFimCompletion(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "deepseek-ai/deepseek-coder-1.3b-base"
--- a/test/srt/test_fp8_kernel.py
+++ b/test/srt/test_fp8_kernel.py
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
    per_token_group_quant_fp8,
    w8a8_block_fp8_matmul,
 )
+from sglang.test.test_utils import CustomTestCase


-class TestFP8Base(unittest.TestCase):
+class TestFP8Base(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.M = 256
--- a/test/srt/test_fp8_kvcache.py
+++ b/test/srt/test_fp8_kvcache.py
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestFp8KvcacheBase(unittest.TestCase):
+class TestFp8KvcacheBase(CustomTestCase):
    model_config = None

    @classmethod
--- a/test/srt/test_function_calling.py
+++ b/test/srt/test_function_calling.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestOpenAIServerFunctionCalling(unittest.TestCase):
+class TestOpenAIServerFunctionCalling(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_fused_moe.py
+++ b/test/srt/test_fused_moe.py
@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
+from sglang.test.test_utils import CustomTestCase


-class TestFusedMOE(unittest.TestCase):
+class TestFusedMOE(CustomTestCase):
    NUM_EXPERTS = [8, 64]
    TOP_KS = [2, 6]

--- a/test/srt/test_get_weights_by_name.py
+++ b/test/srt/test_get_weights_by_name.py
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
 )
@@ -26,7 +27,7 @@ def _process_return(ret):
    return np.array(ret)


-class TestGetWeightsByName(unittest.TestCase):
+class TestGetWeightsByName(CustomTestCase):

    def init_hf_model(self, model_name, tie_word_embeddings):
        self.hf_model = AutoModelForCausalLM.from_pretrained(
--- a/test/srt/test_gguf.py
+++ b/test/srt/test_gguf.py
@@ -3,9 +3,10 @@ import unittest
 from huggingface_hub import hf_hub_download

 import sglang as sgl
+from sglang.test.test_utils import CustomTestCase


-class TestGGUF(unittest.TestCase):
+class TestGGUF(CustomTestCase):
    def test_models(self):
        prompt = "Today is a sunny day and I like"
        sampling_params = {"temperature": 0, "max_new_tokens": 8}
--- a/test/srt/test_gptqmodel_dynamic.py
+++ b/test/srt/test_gptqmodel_dynamic.py
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
 # GPTQ with Dynamic Per/Module Quantization Control
 # Leverages GPTQModel (pypi) to produce the `dynamic` models
 # Test GPTQ fallback kernel that is not Marlin
-class TestGPTQModelDynamic(unittest.TestCase):
+class TestGPTQModelDynamic(CustomTestCase):
    MODEL_PATH = (
        "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
    )
@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
 # GPTQ with Dynamic Per/Module Quantization Control
 # Leverages GPTQModel (pypi) to produce the `dynamic` models
 # Test Marlin kernel
-class TestGPTQModelDynamicWithMarlin(unittest.TestCase):
+class TestGPTQModelDynamicWithMarlin(CustomTestCase):
    MODEL_PATH = (
        "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
    )
--- a/test/srt/test_health_check.py
+++ b/test/srt/test_health_check.py
@@ -3,11 +3,12 @@ import unittest
 from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestHealthCheck(unittest.TestCase):
+class TestHealthCheck(CustomTestCase):
    def test_health_check(self):
        """Test that metrics endpoint returns data when enabled"""
        with self.assertRaises(TimeoutError):
--- a/test/srt/test_hicache.py
+++ b/test/srt/test_hicache.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestPageSize(unittest.TestCase):
+class TestPageSize(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_hicache_mla.py
+++ b/test/srt/test_hicache_mla.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestHierarchicalMLA(unittest.TestCase):
+class TestHierarchicalMLA(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
--- a/test/srt/test_hidden_states.py
+++ b/test/srt/test_hidden_states.py
@@ -4,10 +4,10 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer

 import sglang as sgl
-from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase


-class TestHiddenState(unittest.TestCase):
+class TestHiddenState(CustomTestCase):
    def test_return_hidden_states(self):
        prompts = ["Today is", "Today is a sunny day and I like"]
        model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_input_embeddings.py
+++ b/test/srt/test_input_embeddings.py
@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestInputEmbeds(unittest.TestCase):
+class TestInputEmbeds(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_int8_kernel.py
+++ b/test/srt/test_int8_kernel.py
@@ -6,6 +6,7 @@ import torch
 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
 from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
+from sglang.test.test_utils import CustomTestCase


 def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
    ).sum(dim=1)


-class TestW8A8Int8FusedMoE(unittest.TestCase):
+class TestW8A8Int8FusedMoE(CustomTestCase):
    DTYPES = [torch.half, torch.bfloat16]
    M = [1, 33]
    N = [128, 1024]
--- a/test/srt/test_json_constrained.py
+++ b/test/srt/test_json_constrained.py
@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
    )


-class TestJSONConstrainedOutlinesBackend(unittest.TestCase):
+class TestJSONConstrainedOutlinesBackend(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, backend="outlines")
--- a/test/srt/test_large_max_new_tokens.py
+++ b/test/srt/test_large_max_new_tokens.py
@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
    DEFAULT_URL_FOR_TEST,
    STDERR_FILENAME,
    STDOUT_FILENAME,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestLargeMaxNewTokens(unittest.TestCase):
+class TestLargeMaxNewTokens(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_matched_stop.py
+++ b/test/srt/test_matched_stop.py
@@ -7,6 +7,7 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -18,7 +19,7 @@ The story should span multiple events, challenges, and character developments ov
 """


-class TestMatchedStop(unittest.TestCase):
+class TestMatchedStop(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_metrics.py
+++ b/test/srt/test_metrics.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestEnableMetrics(unittest.TestCase):
+class TestEnableMetrics(CustomTestCase):
    def test_metrics_enabled(self):
        """Test that metrics endpoint returns data when enabled"""
        process = popen_launch_server(
--- a/test/srt/test_mla.py
+++ b/test/srt/test_mla.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestMLA(unittest.TestCase):
+class TestMLA(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
--- a/test/srt/test_mla_deepseek_v3.py
+++ b/test/srt/test_mla_deepseek_v3.py
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestMLADeepseekV3(unittest.TestCase):
+class TestMLADeepseekV3(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -48,7 +49,7 @@ class TestMLADeepseekV3(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.62)


-class TestDeepseekV3MTP(unittest.TestCase):
+class TestDeepseekV3MTP(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
--- a/test/srt/test_mla_flashinfer.py
+++ b/test/srt/test_mla_flashinfer.py
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestFlashinferMLA(unittest.TestCase):
+class TestFlashinferMLA(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -55,7 +56,7 @@ class TestFlashinferMLA(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.62)


-class TestFlashinferMLANoRagged(unittest.TestCase):
+class TestFlashinferMLANoRagged(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -99,7 +100,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.62)


-class TestFlashinferMLAMTP(unittest.TestCase):
+class TestFlashinferMLAMTP(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
--- a/test/srt/test_mla_fp8.py
+++ b/test/srt/test_mla_fp8.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestMLA(unittest.TestCase):
+class TestMLA(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST
--- a/test/srt/test_mla_int8_deepseek_v3.py
+++ b/test/srt/test_mla_int8_deepseek_v3.py
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
+class TestMLADeepseekV3ChannelInt8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
@@ -48,7 +49,7 @@ class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.62)


-class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
+class TestDeepseekV3MTPChannelInt8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
@@ -109,7 +110,7 @@ class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
        self.assertGreater(avg_spec_accept_length, 2.5)


-class TestMLADeepseekV3BlockInt8(unittest.TestCase):
+class TestMLADeepseekV3BlockInt8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
@@ -144,7 +145,7 @@ class TestMLADeepseekV3BlockInt8(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.62)


-class TestDeepseekV3MTPBlockInt8(unittest.TestCase):
+class TestDeepseekV3MTPBlockInt8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
--- a/test/srt/test_mla_tp.py
+++ b/test/srt/test_mla_tp.py
@@ -8,11 +8,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestDeepseekTP2(unittest.TestCase):
+class TestDeepseekTP2(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmsys/sglang-ci-dsv3-test"
--- a/test/srt/test_modelopt_fp8kvcache.py
+++ b/test/srt/test_modelopt_fp8kvcache.py
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.modelopt_quant import (
    ModelOptFp8Config,
    ModelOptFp8KVCacheMethod,
 )
+from sglang.test.test_utils import CustomTestCase


-class TestModelOptFp8KVCacheMethod(unittest.TestCase):
+class TestModelOptFp8KVCacheMethod(CustomTestCase):
    def test_kv_cache_method_initialization(self):
        """Test that ModelOptFp8KVCacheMethod can be instantiated and
        inherits from BaseKVCacheMethod."""
--- a/test/srt/test_models_from_modelscope.py
+++ b/test/srt/test_models_from_modelscope.py
@@ -5,9 +5,10 @@ import unittest
 from unittest import mock

 from sglang.srt.utils import prepare_model_and_tokenizer
+from sglang.test.test_utils import CustomTestCase


-class TestDownloadFromModelScope(unittest.TestCase):
+class TestDownloadFromModelScope(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/test_moe_deepep.py
+++ b/test/srt/test_moe_deepep.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestDeepEPMoE(unittest.TestCase):
+class TestDeepEPMoE(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
--- a/test/srt/test_moe_ep.py
+++ b/test/srt/test_moe_ep.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestEpMoE(unittest.TestCase):
+class TestEpMoE(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
@@ -59,7 +60,7 @@ class TestEpMoE(unittest.TestCase):
        self.assertGreater(metrics["score"], 0.8)


-class TestEpMoEFP8(unittest.TestCase):
+class TestEpMoEFP8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
--- a/test/srt/test_moe_eval_accuracy_large.py
+++ b/test/srt/test_moe_eval_accuracy_large.py
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
    write_github_step_summary,
 )


-class TestMoEEvalAccuracyLarge(unittest.TestCase):
+class TestMoEEvalAccuracyLarge(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
--- a/test/srt/test_nightly_gsm8k_eval.py
+++ b/test/srt/test_nightly_gsm8k_eval.py
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
    write_github_step_summary,
@@ -129,7 +130,7 @@ def check_model_scores(results):
        raise AssertionError("\n".join(failed_models))


-class TestNightlyGsm8KEval(unittest.TestCase):
+class TestNightlyGsm8KEval(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model_groups = [
--- a/test/srt/test_nightly_human_eval.py
+++ b/test/srt/test_nightly_human_eval.py
@@ -14,11 +14,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
 )


-class TestNightlyHumanEval(unittest.TestCase):
+class TestNightlyHumanEval(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        if is_in_ci():
--- a/test/srt/test_nightly_math_eval.py
+++ b/test/srt/test_nightly_math_eval.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestEvalAccuracyLarge(unittest.TestCase):
+class TestEvalAccuracyLarge(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_no_chunked_prefill.py
+++ b/test/srt/test_no_chunked_prefill.py
@@ -2,12 +2,13 @@ import unittest

 from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
+    CustomTestCase,
    run_bench_serving,
    run_mmlu_test,
 )


-class TestNoChunkedPrefill(unittest.TestCase):
+class TestNoChunkedPrefill(CustomTestCase):

    def test_no_chunked_prefill(self):
        run_mmlu_test(
--- a/test/srt/test_no_overlap_scheduler.py
+++ b/test/srt/test_no_overlap_scheduler.py
@@ -6,10 +6,10 @@ python3 test_overlap_schedule.py

 import unittest

-from sglang.test.test_utils import run_mmlu_test
+from sglang.test.test_utils import CustomTestCase, run_mmlu_test


-class TestOverlapSchedule(unittest.TestCase):
+class TestOverlapSchedule(CustomTestCase):
    def test_no_radix_attention_chunked_prefill(self):
        run_mmlu_test(
            disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True
--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
@@ -18,11 +18,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestOpenAIServer(unittest.TestCase):
+class TestOpenAIServer(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -541,7 +542,7 @@ The SmartHome Mini is a compact smart home assistant available in black or white
 #    EBNF Test Class: TestOpenAIServerEBNF
 #    Launches the server with xgrammar, has only EBNF tests
 # -------------------------------------------------------------------------
-class TestOpenAIServerEBNF(unittest.TestCase):
+class TestOpenAIServerEBNF(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -624,7 +625,7 @@ class TestOpenAIServerEBNF(unittest.TestCase):
        )


-class TestOpenAIEmbedding(unittest.TestCase):
+class TestOpenAIEmbedding(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
--- a/test/srt/test_page_size.py
+++ b/test/srt/test_page_size.py
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestPageSize(unittest.TestCase):
+class TestPageSize(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1"
--- a/test/srt/test_penalty.py
+++ b/test/srt/test_penalty.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestPenalty(unittest.TestCase):
+class TestPenalty(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/test_pytorch_sampling_backend.py
+++ b/test/srt/test_pytorch_sampling_backend.py
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestPyTorchSamplingBackend(unittest.TestCase):
+class TestPyTorchSamplingBackend(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_radix_attention.py
+++ b/test/srt/test_radix_attention.py
@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    kill_process_tree,
    popen_launch_server,
 )
@@ -59,7 +60,7 @@ def run_test(base_url, nodes):
    assert res.status_code == 200


-class TestRadixCacheFCFS(unittest.TestCase):
+class TestRadixCacheFCFS(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_reasoning_content.py
+++ b/test/srt/test_reasoning_content.py
@@ -20,11 +20,12 @@ from sglang.test.test_utils import (
    DEFAULT_REASONING_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestReasoningContentAPI(unittest.TestCase):
+class TestReasoningContentAPI(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
@@ -181,7 +182,7 @@ class TestReasoningContentAPI(unittest.TestCase):
        assert len(response.choices[0].message.content) > 0


-class TestReasoningContentWithoutParser(unittest.TestCase):
+class TestReasoningContentWithoutParser(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
--- a/test/srt/test_regex_constrained.py
+++ b/test/srt/test_regex_constrained.py
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -41,7 +42,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
    )


-class TestRegexConstrained(unittest.TestCase):
+class TestRegexConstrained(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        setup_class(cls, "xgrammar", disable_overlap=False)
--- a/test/srt/test_release_memory_occupation.py
+++ b/test/srt/test_release_memory_occupation.py
@@ -5,13 +5,13 @@ import torch
 from transformers import AutoModelForCausalLM

 import sglang as sgl
-from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase

 # (temporarily) set to true to observe memory usage in nvidia-smi more clearly
 _DEBUG_EXTRA = True


-class TestReleaseMemoryOccupation(unittest.TestCase):
+class TestReleaseMemoryOccupation(CustomTestCase):
    def test_release_and_resume_occupation(self):
        prompt = "Today is a sunny day and I like"
        sampling_params = {"temperature": 0, "max_new_tokens": 8}
--- a/test/srt/test_request_length_validation.py
+++ b/test/srt/test_request_length_validation.py
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestRequestLengthValidation(unittest.TestCase):
+class TestRequestLengthValidation(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.base_url = DEFAULT_URL_FOR_TEST
--- a/test/srt/test_retract_decode.py
+++ b/test/srt/test_retract_decode.py
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestRetractDecode(unittest.TestCase):
+class TestRetractDecode(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        os.environ["SGLANG_TEST_RETRACT"] = "1"
@@ -40,7 +41,7 @@ class TestRetractDecode(unittest.TestCase):
        self.assertGreaterEqual(metrics["score"], 0.65)


-class TestRetractDecodeChunkCache(unittest.TestCase):
+class TestRetractDecodeChunkCache(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        os.environ["SGLANG_TEST_RETRACT"] = "1"
--- a/test/srt/test_sagemaker_server.py
+++ b/test/srt/test_sagemaker_server.py
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestSageMakerServer(unittest.TestCase):
+class TestSageMakerServer(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_schedule_policy.py
+++ b/test/srt/test_schedule_policy.py
@@ -8,9 +8,10 @@ from sglang.srt.managers.schedule_policy import (
 )
 from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
 from sglang.srt.sampling.sampling_params import SamplingParams
+from sglang.test.test_utils import CustomTestCase


-class TestSchedulePolicy(unittest.TestCase):
+class TestSchedulePolicy(CustomTestCase):

    def setUp(self):
        self.tree_cache = RadixCache(None, None, False)
--- a/test/srt/test_server_args.py
+++ b/test/srt/test_server_args.py
@@ -2,9 +2,10 @@ import json
 import unittest

 from sglang.srt.server_args import prepare_server_args
+from sglang.test.test_utils import CustomTestCase


-class TestPrepareServerArgs(unittest.TestCase):
+class TestPrepareServerArgs(CustomTestCase):
    def test_prepare_server_args(self):
        server_args = prepare_server_args(
            [
--- a/test/srt/test_session_control.py
+++ b/test/srt/test_session_control.py
@@ -19,6 +19,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )

@@ -27,7 +28,7 @@ def remove_prefix(text: str, prefix: str) -> str:
    return text[len(prefix) :] if text.startswith(prefix) else text


-class TestSessionControl(unittest.TestCase):
+class TestSessionControl(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -560,7 +561,7 @@ class TestSessionControl(unittest.TestCase):
        )


-class TestSessionControlVision(unittest.TestCase):
+class TestSessionControlVision(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmms-lab/llava-onevision-qwen2-7b-ov"
--- a/test/srt/test_skip_tokenizer_init.py
+++ b/test/srt/test_skip_tokenizer_init.py
@@ -19,11 +19,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_VLM_MODEL_NAME,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestSkipTokenizerInit(unittest.TestCase):
+class TestSkipTokenizerInit(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_srt_endpoint.py
+++ b/test/srt/test_srt_endpoint.py
@@ -20,12 +20,13 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
    run_logprob_check,
 )


-class TestSRTEndpoint(unittest.TestCase):
+class TestSRTEndpoint(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
--- a/test/srt/test_srt_engine.py
+++ b/test/srt/test_srt_engine.py
@@ -18,10 +18,11 @@ from sglang.test.few_shot_gsm8k_engine import run_eval
 from sglang.test.test_utils import (
    DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+    CustomTestCase,
 )


-class TestSRTEngine(unittest.TestCase):
+class TestSRTEngine(CustomTestCase):

    def test_1_engine_runtime_consistency(self):
        prompt = "Today is a sunny day and I like"
--- a/test/srt/test_srt_engine_with_quant_args.py
+++ b/test/srt/test_srt_engine_with_quant_args.py
@@ -1,10 +1,10 @@
 import unittest

 import sglang as sgl
-from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase


-class TestSRTEngineWithQuantArgs(unittest.TestCase):
+class TestSRTEngineWithQuantArgs(CustomTestCase):

    def test_1_quantization_args(self):

--- a/test/srt/test_torch_compile.py
+++ b/test/srt/test_torch_compile.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestTorchCompile(unittest.TestCase):
+class TestTorchCompile(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_torch_compile_moe.py
+++ b/test/srt/test_torch_compile_moe.py
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestTorchCompileMoe(unittest.TestCase):
+class TestTorchCompileMoe(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
--- a/test/srt/test_torch_native_attention_backend.py
+++ b/test/srt/test_torch_native_attention_backend.py
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
    run_bench_one_batch,
 )


-class TestTorchNativeAttnBackend(unittest.TestCase):
+class TestTorchNativeAttnBackend(CustomTestCase):
    def test_latency(self):
        output_throughput = run_bench_one_batch(
            DEFAULT_MODEL_NAME_FOR_TEST,
--- a/test/srt/test_torch_tp.py
+++ b/test/srt/test_torch_tp.py
@@ -1,9 +1,9 @@
 import unittest

-from sglang.test.test_utils import is_in_ci, run_bench_one_batch
+from sglang.test.test_utils import CustomTestCase, is_in_ci, run_bench_one_batch


-class TestTorchTP(unittest.TestCase):
+class TestTorchTP(CustomTestCase):
    def test_torch_native_llama(self):
        output_throughput = run_bench_one_batch(
            "meta-llama/Meta-Llama-3-8B",
--- a/test/srt/test_torchao.py
+++ b/test/srt/test_torchao.py
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestTorchAO(unittest.TestCase):
+class TestTorchAO(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_MODEL_NAME_FOR_TEST
--- a/test/srt/test_triton_attention_backend.py
+++ b/test/srt/test_triton_attention_backend.py
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
    run_bench_one_batch,
 )


-class TestTritonAttnBackend(unittest.TestCase):
+class TestTritonAttnBackend(CustomTestCase):
    def test_latency(self):
        output_throughput = run_bench_one_batch(
            DEFAULT_MODEL_NAME_FOR_TEST,
--- a/test/srt/test_triton_attention_kernels.py
+++ b/test/srt/test_triton_attention_kernels.py
@@ -15,9 +15,10 @@ from sglang.srt.layers.attention.triton_ops.extend_attention import (
 from sglang.srt.layers.attention.triton_ops.prefill_attention import (
    context_attention_fwd,
 )
+from sglang.test.test_utils import CustomTestCase


-class TestTritonAttention(unittest.TestCase):
+class TestTritonAttention(CustomTestCase):

    def _set_all_seeds(self, seed):
        """Set all random seeds for reproducibility."""
--- a/test/srt/test_triton_attention_rocm_mla.py
+++ b/test/srt/test_triton_attention_rocm_mla.py
@@ -10,9 +10,10 @@ from sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope import (
    decode_attention_fwd_grouped_rope,
 )
 from sglang.srt.layers.rotary_embedding import DeepseekScalingRotaryEmbedding
+from sglang.test.test_utils import CustomTestCase


-class TestTritonAttentionMLA(unittest.TestCase):
+class TestTritonAttentionMLA(CustomTestCase):

    def _set_all_seeds(self, seed):
        """Set all random seeds for reproducibility."""
--- a/test/srt/test_update_weights_from_disk.py
+++ b/test/srt/test_update_weights_from_disk.py
@@ -10,6 +10,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
 )
@@ -18,7 +19,7 @@ from sglang.test.test_utils import (
 ###############################################################################
 # Engine Mode Tests (Single-configuration)
 ###############################################################################
-class TestEngineUpdateWeightsFromDisk(unittest.TestCase):
+class TestEngineUpdateWeightsFromDisk(CustomTestCase):
    def setUp(self):
        self.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
        # Initialize the engine in offline (direct) mode.
@@ -70,7 +71,7 @@ class TestEngineUpdateWeightsFromDisk(unittest.TestCase):
 ###############################################################################
 # HTTP Server Mode Tests (Single-configuration)
 ###############################################################################
-class TestServerUpdateWeightsFromDisk(unittest.TestCase):
+class TestServerUpdateWeightsFromDisk(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -159,7 +160,7 @@ class TestServerUpdateWeightsFromDisk(unittest.TestCase):
 # - In a non-CI environment: test both Engine and Server modes, and enumerate all combinations
 #   with tp and dp ranging from 1 to 2.
 ###############################################################################
-class TestUpdateWeightsFromDiskParameterized(unittest.TestCase):
+class TestUpdateWeightsFromDiskParameterized(CustomTestCase):
    def run_common_test(self, mode, tp, dp):
        """
        Common test procedure for update_weights_from_disk.
--- a/test/srt/test_update_weights_from_distributed.py
+++ b/test/srt/test_update_weights_from_distributed.py
@@ -33,6 +33,7 @@ from sglang.test.test_utils import (
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    is_in_ci,
    popen_launch_server,
 )
@@ -523,7 +524,7 @@ def test_update_weights_from_distributed(
    torch.cuda.empty_cache()


-class TestUpdateWeightsFromDistributed(unittest.TestCase):
+class TestUpdateWeightsFromDistributed(CustomTestCase):

    def test_update_weights_from_distributed(self):

--- a/test/srt/test_update_weights_from_tensor.py
+++ b/test/srt/test_update_weights_from_tensor.py
@@ -5,7 +5,7 @@ import unittest
 import torch

 import sglang as sgl
-from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase


 def test_update_weights_from_tensor(tp_size):
@@ -40,7 +40,7 @@ def test_update_weights_from_tensor(tp_size):
    ), f"Memory leak detected: {memory_after - memory_before} bytes"


-class TestUpdateWeightsFromTensor(unittest.TestCase):
+class TestUpdateWeightsFromTensor(CustomTestCase):
    def test_update_weights_from_tensor(self):
        tp_sizes = [1, 2]
        for tp_size in tp_sizes:
--- a/test/srt/test_verl_engine.py
+++ b/test/srt/test_verl_engine.py
@@ -27,7 +27,7 @@ from sglang.test.runners import (
    check_close_model_outputs,
    get_dtype_str,
 )
-from sglang.test.test_utils import is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci

 _MAX_NEW_TOKENS = 8
 _PROMPTS = ["1+1=2, 1+2=3, 1+3=4, 1+4=5, 1+5=", "1*1=1, 1*2=2, 1*3=3, 1*4=4, 1*5="]
@@ -73,7 +73,7 @@ ALL_OTHER_MODELS = [
 ]


-class TestVerlEngine(unittest.TestCase):
+class TestVerlEngine(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        multiprocessing.set_start_method("spawn")
--- a/Show More
+++ b/Show More