Add retry for flaky tests in CI (#4755)

This commit is contained in:
fzyzcjy
2025-03-26 07:53:12 +08:00
committed by GitHub
parent 52029bd1e3
commit 15ddd84322
112 changed files with 273 additions and 152 deletions

View File

@@ -33,7 +33,7 @@ jobs:
pip install -r docs/requirements.txt pip install -r docs/requirements.txt
apt-get update apt-get update
apt-get install -y pandoc apt-get install -y pandoc
apt-get update && apt-get install -y parallel apt-get update && apt-get install -y parallel retry
- name: Setup Jupyter Kernel - name: Setup Jupyter Kernel
run: | run: |

View File

@@ -23,7 +23,8 @@ compile:
parallel -0 -j3 --halt soon,fail=1 ' \ parallel -0 -j3 --halt soon,fail=1 ' \
NB_NAME=$$(basename {}); \ NB_NAME=$$(basename {}); \
START_TIME=$$(date +%s); \ START_TIME=$$(date +%s); \
jupyter nbconvert --to notebook --execute --inplace "{}" \ retry --delay=0 --times=3 -- \
jupyter nbconvert --to notebook --execute --inplace "{}" \
--ExecutePreprocessor.timeout=600 \ --ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=python3; \ --ExecutePreprocessor.kernel_name=python3; \
RET_CODE=$$?; \ RET_CODE=$$?; \

View File

@@ -4,9 +4,10 @@ import unittest
import torch import torch
from sglang.srt.layers.activation import GeluAndMul from sglang.srt.layers.activation import GeluAndMul
from sglang.test.test_utils import CustomTestCase
class TestGeluAndMul(unittest.TestCase): class TestGeluAndMul(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
NUM_TOKENS = [7, 83, 2048] NUM_TOKENS = [7, 83, 2048]
D = [512, 4096, 5120, 13824] D = [512, 4096, 5120, 13824]

View File

@@ -11,6 +11,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
static_quant_fp8, static_quant_fp8,
w8a8_block_fp8_matmul, w8a8_block_fp8_matmul,
) )
from sglang.test.test_utils import CustomTestCase
_is_cuda = torch.cuda.is_available() and torch.version.cuda _is_cuda = torch.cuda.is_available() and torch.version.cuda
@@ -44,7 +45,7 @@ def native_per_token_group_quant_fp8(
return x_q, x_s return x_q, x_s
class TestPerTokenGroupQuantFP8(unittest.TestCase): class TestPerTokenGroupQuantFP8(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16, torch.float32] DTYPES = [torch.half, torch.bfloat16, torch.float32]
NUM_TOKENS = [7, 83, 2048] NUM_TOKENS = [7, 83, 2048]
D = [512, 4096, 5120, 13824] D = [512, 4096, 5120, 13824]
@@ -111,7 +112,7 @@ def native_static_quant_fp8(x, x_s, dtype=torch.float8_e4m3fn):
return x_q, x_s return x_q, x_s
class TestStaticQuantFP8(unittest.TestCase): class TestStaticQuantFP8(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16, torch.float32] DTYPES = [torch.half, torch.bfloat16, torch.float32]
NUM_TOKENS = [7, 83, 2048] NUM_TOKENS = [7, 83, 2048]
D = [512, 4096, 5120, 13824] D = [512, 4096, 5120, 13824]
@@ -210,7 +211,7 @@ def native_w8a8_block_fp8_matmul(A, B, As, Bs, block_size, output_dtype=torch.fl
return C return C
class TestW8A8BlockFP8Matmul(unittest.TestCase): class TestW8A8BlockFP8Matmul(CustomTestCase):
if not _is_cuda: if not _is_cuda:
OUT_DTYPES = [torch.float32, torch.half, torch.bfloat16] OUT_DTYPES = [torch.float32, torch.half, torch.bfloat16]
@@ -331,7 +332,7 @@ def torch_w8a8_block_fp8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
).sum(dim=1) ).sum(dim=1)
class TestW8A8BlockFP8FusedMoE(unittest.TestCase): class TestW8A8BlockFP8FusedMoE(CustomTestCase):
DTYPES = [torch.float32, torch.half, torch.bfloat16] DTYPES = [torch.float32, torch.half, torch.bfloat16]
M = [1, 33, 64, 222, 1024 * 128] M = [1, 33, 64, 222, 1024 * 128]
N = [128, 1024, 2048] N = [128, 1024, 2048]

View File

@@ -13,6 +13,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
silu_and_mul_triton_kernel, silu_and_mul_triton_kernel,
) )
from sglang.srt.layers.moe.topk import select_experts from sglang.srt.layers.moe.topk import select_experts
from sglang.test.test_utils import CustomTestCase
# For test # For test
@@ -232,7 +233,7 @@ def block_dequant(
return x_dq_block return x_dq_block
class TestW8A8BlockFP8EPMoE(unittest.TestCase): class TestW8A8BlockFP8EPMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
M = [1, 222, 1024, 2048] M = [1, 222, 1024, 2048]
N = [128, 1024, 2048] N = [128, 1024, 2048]

View File

@@ -3,9 +3,10 @@ import unittest
import torch import torch
from sglang.srt.utils import DynamicGradMode from sglang.srt.utils import DynamicGradMode
from sglang.test.test_utils import CustomTestCase
class TestDynamicGradMode(unittest.TestCase): class TestDynamicGradMode(CustomTestCase):
def test_inference(self): def test_inference(self):
# Test inference_mode # Test inference_mode
DynamicGradMode.set_inference_mode(True) DynamicGradMode.set_inference_mode(True)

View File

@@ -4,9 +4,10 @@ import unittest
import torch import torch
from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
from sglang.test.test_utils import CustomTestCase
class TestRMSNorm(unittest.TestCase): class TestRMSNorm(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
NUM_TOKENS = [7, 83, 4096] NUM_TOKENS = [7, 83, 4096]
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199] HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]
@@ -56,7 +57,7 @@ class TestRMSNorm(unittest.TestCase):
self._run_rms_norm_test(*params) self._run_rms_norm_test(*params)
class TestGemmaRMSNorm(unittest.TestCase): class TestGemmaRMSNorm(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
NUM_TOKENS = [7, 83, 4096] NUM_TOKENS = [7, 83, 4096]
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199] HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]

View File

@@ -8,6 +8,7 @@ import random
import subprocess import subprocess
import threading import threading
import time import time
import traceback
import unittest import unittest
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
@@ -998,3 +999,30 @@ def run_logprob_check(self: unittest.TestCase, arg: Tuple):
rank += 1 rank += 1
else: else:
raise raise
class CustomTestCase(unittest.TestCase):
def _callTestMethod(self, method):
_retry_execution(
lambda: super(CustomTestCase, self)._callTestMethod(method),
max_retry=_get_max_retry(),
)
def _get_max_retry():
return int(os.environ.get("SGLANG_TEST_MAX_RETRY", "2" if is_in_ci() else "0"))
def _retry_execution(fn, max_retry: int):
if max_retry == 0:
fn()
return
try:
fn()
except Exception as e:
print(
f"retry_execution failed once and will retry. This may be an error or a flaky test. Error: {e}"
)
traceback.print_exc()
_retry_execution(fn, max_retry=max_retry - 1)

View File

@@ -3,9 +3,10 @@ import unittest
from sglang import Anthropic, set_default_backend from sglang import Anthropic, set_default_backend
from sglang.test.test_programs import test_mt_bench, test_stream from sglang.test.test_programs import test_mt_bench, test_stream
from sglang.test.test_utils import CustomTestCase
class TestAnthropicBackend(unittest.TestCase): class TestAnthropicBackend(CustomTestCase):
backend = None backend = None
@classmethod @classmethod

View File

@@ -1,10 +1,10 @@
import unittest import unittest
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
class TestBind(unittest.TestCase): class TestBind(CustomTestCase):
backend = None backend = None
@classmethod @classmethod

View File

@@ -7,6 +7,7 @@ from sglang.lang.choices import (
token_length_normalized, token_length_normalized,
unconditional_likelihood_normalized, unconditional_likelihood_normalized,
) )
from sglang.test.test_utils import CustomTestCase
MOCK_CHOICES_INPUT_DATA = { MOCK_CHOICES_INPUT_DATA = {
"choices": [ "choices": [
@@ -51,7 +52,7 @@ MOCK_CHOICES_INPUT_DATA = {
} }
class TestChoices(unittest.TestCase): class TestChoices(CustomTestCase):
def test_token_length_normalized(self): def test_token_length_normalized(self):
"""Confirm 'antidisestablishmentarianism' is selected due to high confidences for """Confirm 'antidisestablishmentarianism' is selected due to high confidences for

View File

@@ -3,9 +3,10 @@ import unittest
from sglang import LiteLLM, set_default_backend from sglang import LiteLLM, set_default_backend
from sglang.test.test_programs import test_mt_bench, test_stream from sglang.test.test_programs import test_mt_bench, test_stream
from sglang.test.test_utils import CustomTestCase
class TestAnthropicBackend(unittest.TestCase): class TestAnthropicBackend(CustomTestCase):
chat_backend = None chat_backend = None
@classmethod @classmethod

View File

@@ -17,9 +17,10 @@ from sglang.test.test_programs import (
test_stream, test_stream,
test_tool_use, test_tool_use,
) )
from sglang.test.test_utils import CustomTestCase
class TestOpenAIBackend(unittest.TestCase): class TestOpenAIBackend(CustomTestCase):
instruct_backend = None instruct_backend = None
chat_backend = None chat_backend = None
chat_vision_backend = None chat_vision_backend = None

View File

@@ -22,10 +22,10 @@ from sglang.test.test_programs import (
test_stream, test_stream,
test_tool_use, test_tool_use,
) )
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
class TestSRTBackend(unittest.TestCase): class TestSRTBackend(CustomTestCase):
backend = None backend = None
@classmethod @classmethod

View File

@@ -3,9 +3,10 @@ import unittest
import sglang as sgl import sglang as sgl
from sglang.lang.backend.base_backend import BaseBackend from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template from sglang.lang.chat_template import get_chat_template
from sglang.test.test_utils import CustomTestCase
class TestTracing(unittest.TestCase): class TestTracing(CustomTestCase):
def test_few_shot_qa(self): def test_few_shot_qa(self):
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):

View File

@@ -10,9 +10,10 @@ from sglang.test.test_programs import (
test_parallel_encoding, test_parallel_encoding,
test_stream, test_stream,
) )
from sglang.test.test_utils import CustomTestCase
class TestVertexAIBackend(unittest.TestCase): class TestVertexAIBackend(CustomTestCase):
backend = None backend = None
@classmethod @classmethod

View File

@@ -18,6 +18,7 @@ import unittest
import torch import torch
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import CustomTestCase
LORA_SETS = [ LORA_SETS = [
# { # {
@@ -70,7 +71,7 @@ What do you know about llamas?
# PROMPTS.append(sample[0]["content"][:2000]) # PROMPTS.append(sample[0]["content"][:2000])
class TestLoRA(unittest.TestCase): class TestLoRA(CustomTestCase):
def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens): def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
print("=================== testing inference =======================") print("=================== testing inference =======================")

View File

@@ -21,7 +21,7 @@ import torch
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import calculate_rouge_l, is_in_ci from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci
CI_LORA_MODELS = [ CI_LORA_MODELS = [
LoRAModelCase( LoRAModelCase(
@@ -67,7 +67,7 @@ PROMPTS = [
] ]
class TestLoRABackend(unittest.TestCase): class TestLoRABackend(CustomTestCase):
def run_backend( def run_backend(
self, self,
prompt: str, prompt: str,

View File

@@ -21,7 +21,7 @@ import torch
from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import calculate_rouge_l, is_in_ci from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci
CI_LORA_MODELS = [ CI_LORA_MODELS = [
LoRAModelCase( LoRAModelCase(
@@ -69,7 +69,7 @@ PROMPTS = [
BACKEND = "triton" BACKEND = "triton"
class TestLoRATP(unittest.TestCase): class TestLoRATP(CustomTestCase):
def run_tp( def run_tp(
self, self,
prompt: str, prompt: str,

View File

@@ -19,7 +19,7 @@ from typing import List
import torch import torch
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
from sglang.test.test_utils import is_in_ci from sglang.test.test_utils import CustomTestCase, is_in_ci
MULTI_LORA_MODELS = [ MULTI_LORA_MODELS = [
LoRAModelCase( LoRAModelCase(
@@ -51,7 +51,7 @@ PROMPTS = [
] ]
class TestMultiLoRABackend(unittest.TestCase): class TestMultiLoRABackend(CustomTestCase):
def run_backend_batch( def run_backend_batch(
self, self,
prompts: List[str], prompts: List[str],

View File

@@ -20,7 +20,7 @@ import torch
from transformers import AutoConfig, AutoTokenizer from transformers import AutoConfig, AutoTokenizer
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities, is_in_ci from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci
MODELS = [ MODELS = [
("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5), ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestEmbeddingModels(unittest.TestCase): class TestEmbeddingModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):

View File

@@ -33,7 +33,7 @@ from sglang.test.runners import (
SRTRunner, SRTRunner,
check_close_model_outputs, check_close_model_outputs,
) )
from sglang.test.test_utils import is_in_ci from sglang.test.test_utils import CustomTestCase, is_in_ci
@dataclasses.dataclass @dataclasses.dataclass
@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestGenerationModels(unittest.TestCase): class TestGenerationModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):

View File

@@ -19,7 +19,7 @@ import unittest
import torch import torch
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import get_similarities from sglang.test.test_utils import CustomTestCase, get_similarities
TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series." TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg" IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
@@ -31,7 +31,7 @@ MODELS = [
TORCH_DTYPES = [torch.float16] TORCH_DTYPES = [torch.float16]
class TestQmeQwenModels(unittest.TestCase): class TestQmeQwenModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
mp.set_start_method("spawn", force=True) mp.set_start_method("spawn", force=True)

View File

@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestGrok(unittest.TestCase): class TestGrok(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmzheng/grok-1" cls.model = "lmzheng/grok-1"

View File

@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestQwen2(unittest.TestCase): class TestQwen2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "Qwen/Qwen2-7B-Instruct" cls.model = "Qwen/Qwen2-7B-Instruct"
@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.78) self.assertGreater(metrics["accuracy"], 0.78)
class TestQwen2FP8(unittest.TestCase): class TestQwen2FP8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8" cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"

View File

@@ -18,6 +18,7 @@ import unittest
import torch import torch
from sglang.test.runners import HFRunner, SRTRunner from sglang.test.runners import HFRunner, SRTRunner
from sglang.test.test_utils import CustomTestCase
MODELS = [ MODELS = [
("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2), ("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
@@ -41,7 +42,7 @@ CONVS = [
] ]
class TestRewardModels(unittest.TestCase): class TestRewardModels(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):

View File

@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor
import requests import requests
from sglang.test.test_utils import run_and_check_memory_leak from sglang.test.test_utils import CustomTestCase, run_and_check_memory_leak
class TestAbort(unittest.TestCase): class TestAbort(CustomTestCase):
def workload_func(self, base_url, model): def workload_func(self, base_url, model):
def process_func(): def process_func():
def run_one(_): def run_one(_):

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST, DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestAWQ(unittest.TestCase): class TestAWQ(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST

View File

@@ -3,6 +3,7 @@ import unittest
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
get_bool_env_var, get_bool_env_var,
is_in_ci, is_in_ci,
run_bench_one_batch, run_bench_one_batch,
@@ -10,7 +11,7 @@ from sglang.test.test_utils import (
) )
class TestBenchOneBatch(unittest.TestCase): class TestBenchOneBatch(CustomTestCase):
def test_bs1(self): def test_bs1(self):
output_throughput = run_bench_one_batch( output_throughput = run_bench_one_batch(
DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"] DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]

View File

@@ -6,13 +6,14 @@ from sglang.test.test_utils import (
DEFAULT_FP8_MODEL_NAME_FOR_TEST, DEFAULT_FP8_MODEL_NAME_FOR_TEST,
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
run_bench_serving, run_bench_serving,
write_github_step_summary, write_github_step_summary,
) )
class TestBenchServing(unittest.TestCase): class TestBenchServing(CustomTestCase):
def test_offline_throughput_default(self): def test_offline_throughput_default(self):
res = run_bench_serving( res = run_bench_serving(

View File

@@ -5,6 +5,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.test.test_utils import CustomTestCase
# For test # For test
@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
).sum(dim=1) ).sum(dim=1)
class TestW8A8BlockINT8FusedMoE(unittest.TestCase): class TestW8A8BlockINT8FusedMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
M = [1, 33, 64, 222] M = [1, 33, 64, 222]
N = [128, 1024] N = [128, 1024]

View File

@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestCacheReport(unittest.TestCase): class TestCacheReport(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p
import unittest import unittest
from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test
class TestChunkedPrefill(unittest.TestCase): class TestChunkedPrefill(CustomTestCase):
def test_chunked_prefill(self): def test_chunked_prefill(self):
run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False) run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)

View File

@@ -5,9 +5,10 @@ import numpy as np
import torch import torch
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
from sglang.test.test_utils import CustomTestCase
class TestCreateKvIndices(unittest.TestCase): class TestCreateKvIndices(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
if not torch.cuda.is_available(): if not torch.cuda.is_available():

View File

@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import (
graph_capture, graph_capture,
initialize_model_parallel, initialize_model_parallel,
) )
from sglang.test.test_utils import CustomTestCase
def get_open_port() -> int: def get_open_port() -> int:
@@ -54,7 +55,7 @@ def multi_process_parallel(
ray.shutdown() ray.shutdown()
class TestCustomAllReduce(unittest.TestCase): class TestCustomAllReduce(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
random.seed(42) random.seed(42)

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDataParallelism(unittest.TestCase): class TestDataParallelism(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDoubleSparsity(unittest.TestCase): class TestDoubleSparsity(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDPAttentionDP2TP2(unittest.TestCase): class TestDPAttentionDP2TP2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST

View File

@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
run_logprob_check, run_logprob_check,
) )
@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2
decode_tolerance: float = 5e-2 decode_tolerance: float = 5e-2
class TestEAGLEEngine(unittest.TestCase): class TestEAGLEEngine(CustomTestCase):
BASE_CONFIG = { BASE_CONFIG = {
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, "model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, "speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine):
NUM_CONFIGS = 1 NUM_CONFIGS = 1
class TestEAGLEServer(unittest.TestCase): class TestEAGLEServer(CustomTestCase):
PROMPTS = [ PROMPTS = [
"[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]" "[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
'[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]', '[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',

View File

@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
) )
class TestEBNFConstrained(unittest.TestCase): class TestEBNFConstrained(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
setup_class(cls, "xgrammar", disable_overlap=False) setup_class(cls, "xgrammar", disable_overlap=False)

View File

@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestOpenAIServer(unittest.TestCase): class TestOpenAIServer(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "intfloat/e5-mistral-7b-instruct" cls.model = "intfloat/e5-mistral-7b-instruct"

View File

@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
write_github_step_summary, write_github_step_summary,
) )
class TestEvalAccuracyLarge(unittest.TestCase): class TestEvalAccuracyLarge(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEvalFP8Accuracy(unittest.TestCase): class TestEvalFP8Accuracy(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
self.assertGreaterEqual(metrics["score"], 0.61) self.assertGreaterEqual(metrics["score"], 0.61)
class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase): class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):
def _run_test(self, model, other_args, expected_score): def _run_test(self, model, other_args, expected_score):
base_url = DEFAULT_URL_FOR_TEST base_url = DEFAULT_URL_FOR_TEST
@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
) )
class TestEvalFP8ModelOptQuantAccuracy(unittest.TestCase): class TestEvalFP8ModelOptQuantAccuracy(CustomTestCase):
def _run_test(self, model, other_args, expected_score): def _run_test(self, model, other_args, expected_score):
base_url = DEFAULT_URL_FOR_TEST base_url = DEFAULT_URL_FOR_TEST

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestExpertDistribution(unittest.TestCase): class TestExpertDistribution(CustomTestCase):
def setUp(self): def setUp(self):
# Clean up any existing expert distribution files before each test # Clean up any existing expert distribution files before each test
for f in glob.glob("expert_distribution_*.csv"): for f in glob.glob("expert_distribution_*.csv"):

View File

@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestFimCompletion(unittest.TestCase): class TestFimCompletion(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-coder-1.3b-base" cls.model = "deepseek-ai/deepseek-coder-1.3b-base"

View File

@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
per_token_group_quant_fp8, per_token_group_quant_fp8,
w8a8_block_fp8_matmul, w8a8_block_fp8_matmul,
) )
from sglang.test.test_utils import CustomTestCase
class TestFP8Base(unittest.TestCase): class TestFP8Base(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.M = 256 cls.M = 256

View File

@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN, DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestFp8KvcacheBase(unittest.TestCase): class TestFp8KvcacheBase(CustomTestCase):
model_config = None model_config = None
@classmethod @classmethod

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestOpenAIServerFunctionCalling(unittest.TestCase): class TestOpenAIServerFunctionCalling(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm
from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.test.test_utils import CustomTestCase
class TestFusedMOE(unittest.TestCase): class TestFusedMOE(CustomTestCase):
NUM_EXPERTS = [8, 64] NUM_EXPERTS = [8, 64]
TOP_KS = [2, 6] TOP_KS = [2, 6]

View File

@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
) )
@@ -26,7 +27,7 @@ def _process_return(ret):
return np.array(ret) return np.array(ret)
class TestGetWeightsByName(unittest.TestCase): class TestGetWeightsByName(CustomTestCase):
def init_hf_model(self, model_name, tie_word_embeddings): def init_hf_model(self, model_name, tie_word_embeddings):
self.hf_model = AutoModelForCausalLM.from_pretrained( self.hf_model = AutoModelForCausalLM.from_pretrained(

View File

@@ -3,9 +3,10 @@ import unittest
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import CustomTestCase
class TestGGUF(unittest.TestCase): class TestGGUF(CustomTestCase):
def test_models(self): def test_models(self):
prompt = "Today is a sunny day and I like" prompt = "Today is a sunny day and I like"
sampling_params = {"temperature": 0, "max_new_tokens": 8} sampling_params = {"temperature": 0, "max_new_tokens": 8}

View File

@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
# GPTQ with Dynamic Per/Module Quantization Control # GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models # Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test GPTQ fallback kernel that is not Marlin # Test GPTQ fallback kernel that is not Marlin
class TestGPTQModelDynamic(unittest.TestCase): class TestGPTQModelDynamic(CustomTestCase):
MODEL_PATH = ( MODEL_PATH = (
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse" "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
) )
@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
# GPTQ with Dynamic Per/Module Quantization Control # GPTQ with Dynamic Per/Module Quantization Control
# Leverages GPTQModel (pypi) to produce the `dynamic` models # Leverages GPTQModel (pypi) to produce the `dynamic` models
# Test Marlin kernel # Test Marlin kernel
class TestGPTQModelDynamicWithMarlin(unittest.TestCase): class TestGPTQModelDynamicWithMarlin(CustomTestCase):
MODEL_PATH = ( MODEL_PATH = (
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue" "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
) )

View File

@@ -3,11 +3,12 @@ import unittest
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestHealthCheck(unittest.TestCase): class TestHealthCheck(CustomTestCase):
def test_health_check(self): def test_health_check(self):
"""Test that metrics endpoint returns data when enabled""" """Test that metrics endpoint returns data when enabled"""
with self.assertRaises(TimeoutError): with self.assertRaises(TimeoutError):

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestPageSize(unittest.TestCase): class TestPageSize(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestHierarchicalMLA(unittest.TestCase): class TestHierarchicalMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST

View File

@@ -4,10 +4,10 @@ import torch
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
class TestHiddenState(unittest.TestCase): class TestHiddenState(CustomTestCase):
def test_return_hidden_states(self): def test_return_hidden_states(self):
prompts = ["Today is", "Today is a sunny day and I like"] prompts = ["Today is", "Today is a sunny day and I like"]
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestInputEmbeds(unittest.TestCase): class TestInputEmbeds(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -6,6 +6,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8 from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
from sglang.test.test_utils import CustomTestCase
def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16): def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
).sum(dim=1) ).sum(dim=1)
class TestW8A8Int8FusedMoE(unittest.TestCase): class TestW8A8Int8FusedMoE(CustomTestCase):
DTYPES = [torch.half, torch.bfloat16] DTYPES = [torch.half, torch.bfloat16]
M = [1, 33] M = [1, 33]
N = [128, 1024] N = [128, 1024]

View File

@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
) )
class TestJSONConstrainedOutlinesBackend(unittest.TestCase): class TestJSONConstrainedOutlinesBackend(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
setup_class(cls, backend="outlines") setup_class(cls, backend="outlines")

View File

@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
STDERR_FILENAME, STDERR_FILENAME,
STDOUT_FILENAME, STDOUT_FILENAME,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestLargeMaxNewTokens(unittest.TestCase): class TestLargeMaxNewTokens(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -7,6 +7,7 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -18,7 +19,7 @@ The story should span multiple events, challenges, and character developments ov
""" """
class TestMatchedStop(unittest.TestCase): class TestMatchedStop(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEnableMetrics(unittest.TestCase): class TestEnableMetrics(CustomTestCase):
def test_metrics_enabled(self): def test_metrics_enabled(self):
"""Test that metrics endpoint returns data when enabled""" """Test that metrics endpoint returns data when enabled"""
process = popen_launch_server( process = popen_launch_server(

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLA(unittest.TestCase): class TestMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST

View File

@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLADeepseekV3(unittest.TestCase): class TestMLADeepseekV3(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -48,7 +49,7 @@ class TestMLADeepseekV3(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTP(unittest.TestCase): class TestDeepseekV3MTP(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"

View File

@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestFlashinferMLA(unittest.TestCase): class TestFlashinferMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -55,7 +56,7 @@ class TestFlashinferMLA(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestFlashinferMLANoRagged(unittest.TestCase): class TestFlashinferMLANoRagged(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"
@@ -99,7 +100,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestFlashinferMLAMTP(unittest.TestCase): class TestFlashinferMLAMTP(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST, DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLA(unittest.TestCase): class TestMLA(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST

View File

@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestMLADeepseekV3ChannelInt8(unittest.TestCase): class TestMLADeepseekV3ChannelInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
@@ -48,7 +49,7 @@ class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTPChannelInt8(unittest.TestCase): class TestDeepseekV3MTPChannelInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
@@ -109,7 +110,7 @@ class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
self.assertGreater(avg_spec_accept_length, 2.5) self.assertGreater(avg_spec_accept_length, 2.5)
class TestMLADeepseekV3BlockInt8(unittest.TestCase): class TestMLADeepseekV3BlockInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
@@ -144,7 +145,7 @@ class TestMLADeepseekV3BlockInt8(unittest.TestCase):
self.assertGreater(metrics["accuracy"], 0.62) self.assertGreater(metrics["accuracy"], 0.62)
class TestDeepseekV3MTPBlockInt8(unittest.TestCase): class TestDeepseekV3MTPBlockInt8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test" cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"

View File

@@ -8,11 +8,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDeepseekTP2(unittest.TestCase): class TestDeepseekTP2(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmsys/sglang-ci-dsv3-test" cls.model = "lmsys/sglang-ci-dsv3-test"

View File

@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.modelopt_quant import (
ModelOptFp8Config, ModelOptFp8Config,
ModelOptFp8KVCacheMethod, ModelOptFp8KVCacheMethod,
) )
from sglang.test.test_utils import CustomTestCase
class TestModelOptFp8KVCacheMethod(unittest.TestCase): class TestModelOptFp8KVCacheMethod(CustomTestCase):
def test_kv_cache_method_initialization(self): def test_kv_cache_method_initialization(self):
"""Test that ModelOptFp8KVCacheMethod can be instantiated and """Test that ModelOptFp8KVCacheMethod can be instantiated and
inherits from BaseKVCacheMethod.""" inherits from BaseKVCacheMethod."""

View File

@@ -5,9 +5,10 @@ import unittest
from unittest import mock from unittest import mock
from sglang.srt.utils import prepare_model_and_tokenizer from sglang.srt.utils import prepare_model_and_tokenizer
from sglang.test.test_utils import CustomTestCase
class TestDownloadFromModelScope(unittest.TestCase): class TestDownloadFromModelScope(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestDeepEPMoE(unittest.TestCase): class TestDeepEPMoE(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST, DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEpMoE(unittest.TestCase): class TestEpMoE(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
@@ -59,7 +60,7 @@ class TestEpMoE(unittest.TestCase):
self.assertGreater(metrics["score"], 0.8) self.assertGreater(metrics["score"], 0.8)
class TestEpMoEFP8(unittest.TestCase): class TestEpMoEFP8(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST

View File

@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
write_github_step_summary, write_github_step_summary,
) )
class TestMoEEvalAccuracyLarge(unittest.TestCase): class TestMoEEvalAccuracyLarge(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST

View File

@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
write_github_step_summary, write_github_step_summary,
@@ -129,7 +130,7 @@ def check_model_scores(results):
raise AssertionError("\n".join(failed_models)) raise AssertionError("\n".join(failed_models))
class TestNightlyGsm8KEval(unittest.TestCase): class TestNightlyGsm8KEval(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model_groups = [ cls.model_groups = [

View File

@@ -14,11 +14,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
) )
class TestNightlyHumanEval(unittest.TestCase): class TestNightlyHumanEval(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
if is_in_ci(): if is_in_ci():

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestEvalAccuracyLarge(unittest.TestCase): class TestEvalAccuracyLarge(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -2,12 +2,13 @@ import unittest
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
CustomTestCase,
run_bench_serving, run_bench_serving,
run_mmlu_test, run_mmlu_test,
) )
class TestNoChunkedPrefill(unittest.TestCase): class TestNoChunkedPrefill(CustomTestCase):
def test_no_chunked_prefill(self): def test_no_chunked_prefill(self):
run_mmlu_test( run_mmlu_test(

View File

@@ -6,10 +6,10 @@ python3 test_overlap_schedule.py
import unittest import unittest
from sglang.test.test_utils import run_mmlu_test from sglang.test.test_utils import CustomTestCase, run_mmlu_test
class TestOverlapSchedule(unittest.TestCase): class TestOverlapSchedule(CustomTestCase):
def test_no_radix_attention_chunked_prefill(self): def test_no_radix_attention_chunked_prefill(self):
run_mmlu_test( run_mmlu_test(
disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True

View File

@@ -18,11 +18,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestOpenAIServer(unittest.TestCase): class TestOpenAIServer(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -541,7 +542,7 @@ The SmartHome Mini is a compact smart home assistant available in black or white
# EBNF Test Class: TestOpenAIServerEBNF # EBNF Test Class: TestOpenAIServerEBNF
# Launches the server with xgrammar, has only EBNF tests # Launches the server with xgrammar, has only EBNF tests
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
class TestOpenAIServerEBNF(unittest.TestCase): class TestOpenAIServerEBNF(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -624,7 +625,7 @@ class TestOpenAIServerEBNF(unittest.TestCase):
) )
class TestOpenAIEmbedding(unittest.TestCase): class TestOpenAIEmbedding(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST

View File

@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestPageSize(unittest.TestCase): class TestPageSize(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1" os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1"

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestPenalty(unittest.TestCase): class TestPenalty(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):

View File

@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestPyTorchSamplingBackend(unittest.TestCase): class TestPyTorchSamplingBackend(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
kill_process_tree, kill_process_tree,
popen_launch_server, popen_launch_server,
) )
@@ -59,7 +60,7 @@ def run_test(base_url, nodes):
assert res.status_code == 200 assert res.status_code == 200
class TestRadixCacheFCFS(unittest.TestCase): class TestRadixCacheFCFS(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -20,11 +20,12 @@ from sglang.test.test_utils import (
DEFAULT_REASONING_MODEL_NAME_FOR_TEST, DEFAULT_REASONING_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestReasoningContentAPI(unittest.TestCase): class TestReasoningContentAPI(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
@@ -181,7 +182,7 @@ class TestReasoningContentAPI(unittest.TestCase):
assert len(response.choices[0].message.content) > 0 assert len(response.choices[0].message.content) > 0
class TestReasoningContentWithoutParser(unittest.TestCase): class TestReasoningContentWithoutParser(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST

View File

@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -41,7 +42,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
) )
class TestRegexConstrained(unittest.TestCase): class TestRegexConstrained(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
setup_class(cls, "xgrammar", disable_overlap=False) setup_class(cls, "xgrammar", disable_overlap=False)

View File

@@ -5,13 +5,13 @@ import torch
from transformers import AutoModelForCausalLM from transformers import AutoModelForCausalLM
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
# (temporarily) set to true to observe memory usage in nvidia-smi more clearly # (temporarily) set to true to observe memory usage in nvidia-smi more clearly
_DEBUG_EXTRA = True _DEBUG_EXTRA = True
class TestReleaseMemoryOccupation(unittest.TestCase): class TestReleaseMemoryOccupation(CustomTestCase):
def test_release_and_resume_occupation(self): def test_release_and_resume_occupation(self):
prompt = "Today is a sunny day and I like" prompt = "Today is a sunny day and I like"
sampling_params = {"temperature": 0, "max_new_tokens": 8} sampling_params = {"temperature": 0, "max_new_tokens": 8}

View File

@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestRequestLengthValidation(unittest.TestCase): class TestRequestLengthValidation(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST cls.base_url = DEFAULT_URL_FOR_TEST

View File

@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestRetractDecode(unittest.TestCase): class TestRetractDecode(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
os.environ["SGLANG_TEST_RETRACT"] = "1" os.environ["SGLANG_TEST_RETRACT"] = "1"
@@ -40,7 +41,7 @@ class TestRetractDecode(unittest.TestCase):
self.assertGreaterEqual(metrics["score"], 0.65) self.assertGreaterEqual(metrics["score"], 0.65)
class TestRetractDecodeChunkCache(unittest.TestCase): class TestRetractDecodeChunkCache(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
os.environ["SGLANG_TEST_RETRACT"] = "1" os.environ["SGLANG_TEST_RETRACT"] = "1"

View File

@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestSageMakerServer(unittest.TestCase): class TestSageMakerServer(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -8,9 +8,10 @@ from sglang.srt.managers.schedule_policy import (
) )
from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
from sglang.srt.sampling.sampling_params import SamplingParams from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.test.test_utils import CustomTestCase
class TestSchedulePolicy(unittest.TestCase): class TestSchedulePolicy(CustomTestCase):
def setUp(self): def setUp(self):
self.tree_cache = RadixCache(None, None, False) self.tree_cache = RadixCache(None, None, False)

View File

@@ -2,9 +2,10 @@ import json
import unittest import unittest
from sglang.srt.server_args import prepare_server_args from sglang.srt.server_args import prepare_server_args
from sglang.test.test_utils import CustomTestCase
class TestPrepareServerArgs(unittest.TestCase): class TestPrepareServerArgs(CustomTestCase):
def test_prepare_server_args(self): def test_prepare_server_args(self):
server_args = prepare_server_args( server_args = prepare_server_args(
[ [

View File

@@ -19,6 +19,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
@@ -27,7 +28,7 @@ def remove_prefix(text: str, prefix: str) -> str:
return text[len(prefix) :] if text.startswith(prefix) else text return text[len(prefix) :] if text.startswith(prefix) else text
class TestSessionControl(unittest.TestCase): class TestSessionControl(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
@@ -560,7 +561,7 @@ class TestSessionControl(unittest.TestCase):
) )
class TestSessionControlVision(unittest.TestCase): class TestSessionControlVision(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmms-lab/llava-onevision-qwen2-7b-ov" cls.model = "lmms-lab/llava-onevision-qwen2-7b-ov"

View File

@@ -19,11 +19,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_VLM_MODEL_NAME, DEFAULT_SMALL_VLM_MODEL_NAME,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestSkipTokenizerInit(unittest.TestCase): class TestSkipTokenizerInit(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -20,12 +20,13 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
run_logprob_check, run_logprob_check,
) )
class TestSRTEndpoint(unittest.TestCase): class TestSRTEndpoint(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST

View File

@@ -18,10 +18,11 @@ from sglang.test.few_shot_gsm8k_engine import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
CustomTestCase,
) )
class TestSRTEngine(unittest.TestCase): class TestSRTEngine(CustomTestCase):
def test_1_engine_runtime_consistency(self): def test_1_engine_runtime_consistency(self):
prompt = "Today is a sunny day and I like" prompt = "Today is a sunny day and I like"

View File

@@ -1,10 +1,10 @@
import unittest import unittest
import sglang as sgl import sglang as sgl
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
class TestSRTEngineWithQuantArgs(unittest.TestCase): class TestSRTEngineWithQuantArgs(CustomTestCase):
def test_1_quantization_args(self): def test_1_quantization_args(self):

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestTorchCompile(unittest.TestCase): class TestTorchCompile(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST

View File

@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST, DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server, popen_launch_server,
) )
class TestTorchCompileMoe(unittest.TestCase): class TestTorchCompileMoe(CustomTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST

View File

@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci, is_in_ci,
popen_launch_server, popen_launch_server,
run_bench_one_batch, run_bench_one_batch,
) )
class TestTorchNativeAttnBackend(unittest.TestCase): class TestTorchNativeAttnBackend(CustomTestCase):
def test_latency(self): def test_latency(self):
output_throughput = run_bench_one_batch( output_throughput = run_bench_one_batch(
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,

View File

@@ -1,9 +1,9 @@
import unittest import unittest
from sglang.test.test_utils import is_in_ci, run_bench_one_batch from sglang.test.test_utils import CustomTestCase, is_in_ci, run_bench_one_batch
class TestTorchTP(unittest.TestCase): class TestTorchTP(CustomTestCase):
def test_torch_native_llama(self): def test_torch_native_llama(self):
output_throughput = run_bench_one_batch( output_throughput = run_bench_one_batch(
"meta-llama/Meta-Llama-3-8B", "meta-llama/Meta-Llama-3-8B",

Some files were not shown because too many files have changed in this diff Show More