Add retry for flaky tests in CI (#4755)

2025-03-26 07:53:12 +08:00
parent 52029bd1e3
commit 15ddd84322
112 changed files with 273 additions and 152 deletions
--- a/test/srt/models/lora/test_lora.py
+++ b/test/srt/models/lora/test_lora.py
@@ -18,6 +18,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
+from sglang.test.test_utils import CustomTestCase

 LORA_SETS = [
    # {
@@ -70,7 +71,7 @@ What do you know about llamas?
 #     PROMPTS.append(sample[0]["content"][:2000])


-class TestLoRA(unittest.TestCase):
+class TestLoRA(CustomTestCase):

    def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
        print("=================== testing inference =======================")
--- a/test/srt/models/lora/test_lora_backend.py
+++ b/test/srt/models/lora/test_lora_backend.py
@@ -21,7 +21,7 @@ import torch
 from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import calculate_rouge_l, is_in_ci
+from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci

 CI_LORA_MODELS = [
    LoRAModelCase(
@@ -67,7 +67,7 @@ PROMPTS = [
 ]


-class TestLoRABackend(unittest.TestCase):
+class TestLoRABackend(CustomTestCase):
    def run_backend(
        self,
        prompt: str,
--- a/test/srt/models/lora/test_lora_tp.py
+++ b/test/srt/models/lora/test_lora_tp.py
@@ -21,7 +21,7 @@ import torch
 from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import calculate_rouge_l, is_in_ci
+from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci

 CI_LORA_MODELS = [
    LoRAModelCase(
@@ -69,7 +69,7 @@ PROMPTS = [
 BACKEND = "triton"


-class TestLoRATP(unittest.TestCase):
+class TestLoRATP(CustomTestCase):
    def run_tp(
        self,
        prompt: str,
--- a/test/srt/models/lora/test_multi_lora_backend.py
+++ b/test/srt/models/lora/test_multi_lora_backend.py
@@ -19,7 +19,7 @@ from typing import List
 import torch
 from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase

-from sglang.test.test_utils import is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci

 MULTI_LORA_MODELS = [
    LoRAModelCase(
@@ -51,7 +51,7 @@ PROMPTS = [
 ]


-class TestMultiLoRABackend(unittest.TestCase):
+class TestMultiLoRABackend(CustomTestCase):
    def run_backend_batch(
        self,
        prompts: List[str],
--- a/test/srt/models/test_embedding_models.py
+++ b/test/srt/models/test_embedding_models.py
@@ -20,7 +20,7 @@ import torch
 from transformers import AutoConfig, AutoTokenizer

 from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
-from sglang.test.test_utils import get_similarities, is_in_ci
+from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci

 MODELS = [
    ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
@@ -31,7 +31,7 @@ MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestEmbeddingModels(unittest.TestCase):
+class TestEmbeddingModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -33,7 +33,7 @@ from sglang.test.runners import (
    SRTRunner,
    check_close_model_outputs,
 )
-from sglang.test.test_utils import is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci


@dataclasses.dataclass
@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestGenerationModels(unittest.TestCase):
+class TestGenerationModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):
--- a/test/srt/models/test_gme_qwen_models.py
+++ b/test/srt/models/test_gme_qwen_models.py
@@ -19,7 +19,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
-from sglang.test.test_utils import get_similarities
+from sglang.test.test_utils import CustomTestCase, get_similarities

 TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
 IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
@@ -31,7 +31,7 @@ MODELS = [
 TORCH_DTYPES = [torch.float16]


-class TestQmeQwenModels(unittest.TestCase):
+class TestQmeQwenModels(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        mp.set_start_method("spawn", force=True)
--- a/test/srt/models/test_grok_models.py
+++ b/test/srt/models/test_grok_models.py
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestGrok(unittest.TestCase):
+class TestGrok(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "lmzheng/grok-1"
--- a/test/srt/models/test_qwen_models.py
+++ b/test/srt/models/test_qwen_models.py
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
    popen_launch_server,
 )


-class TestQwen2(unittest.TestCase):
+class TestQwen2(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "Qwen/Qwen2-7B-Instruct"
@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
        self.assertGreater(metrics["accuracy"], 0.78)


-class TestQwen2FP8(unittest.TestCase):
+class TestQwen2FP8(CustomTestCase):
    @classmethod
    def setUpClass(cls):
        cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
--- a/test/srt/models/test_reward_models.py
+++ b/test/srt/models/test_reward_models.py
@@ -18,6 +18,7 @@ import unittest
 import torch

 from sglang.test.runners import HFRunner, SRTRunner
+from sglang.test.test_utils import CustomTestCase

 MODELS = [
    ("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
@@ -41,7 +42,7 @@ CONVS = [
 ]


-class TestRewardModels(unittest.TestCase):
+class TestRewardModels(CustomTestCase):

    @classmethod
    def setUpClass(cls):