Add retry for flaky tests in CI (#4755)
This commit is contained in:
2
.github/workflows/execute-notebook.yml
vendored
2
.github/workflows/execute-notebook.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
pip install -r docs/requirements.txt
|
pip install -r docs/requirements.txt
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install -y pandoc
|
apt-get install -y pandoc
|
||||||
apt-get update && apt-get install -y parallel
|
apt-get update && apt-get install -y parallel retry
|
||||||
|
|
||||||
- name: Setup Jupyter Kernel
|
- name: Setup Jupyter Kernel
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ compile:
|
|||||||
parallel -0 -j3 --halt soon,fail=1 ' \
|
parallel -0 -j3 --halt soon,fail=1 ' \
|
||||||
NB_NAME=$$(basename {}); \
|
NB_NAME=$$(basename {}); \
|
||||||
START_TIME=$$(date +%s); \
|
START_TIME=$$(date +%s); \
|
||||||
jupyter nbconvert --to notebook --execute --inplace "{}" \
|
retry --delay=0 --times=3 -- \
|
||||||
|
jupyter nbconvert --to notebook --execute --inplace "{}" \
|
||||||
--ExecutePreprocessor.timeout=600 \
|
--ExecutePreprocessor.timeout=600 \
|
||||||
--ExecutePreprocessor.kernel_name=python3; \
|
--ExecutePreprocessor.kernel_name=python3; \
|
||||||
RET_CODE=$$?; \
|
RET_CODE=$$?; \
|
||||||
|
|||||||
@@ -4,9 +4,10 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.layers.activation import GeluAndMul
|
from sglang.srt.layers.activation import GeluAndMul
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestGeluAndMul(unittest.TestCase):
|
class TestGeluAndMul(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
NUM_TOKENS = [7, 83, 2048]
|
NUM_TOKENS = [7, 83, 2048]
|
||||||
D = [512, 4096, 5120, 13824]
|
D = [512, 4096, 5120, 13824]
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
|
|||||||
static_quant_fp8,
|
static_quant_fp8,
|
||||||
w8a8_block_fp8_matmul,
|
w8a8_block_fp8_matmul,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
_is_cuda = torch.cuda.is_available() and torch.version.cuda
|
_is_cuda = torch.cuda.is_available() and torch.version.cuda
|
||||||
|
|
||||||
@@ -44,7 +45,7 @@ def native_per_token_group_quant_fp8(
|
|||||||
return x_q, x_s
|
return x_q, x_s
|
||||||
|
|
||||||
|
|
||||||
class TestPerTokenGroupQuantFP8(unittest.TestCase):
|
class TestPerTokenGroupQuantFP8(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16, torch.float32]
|
DTYPES = [torch.half, torch.bfloat16, torch.float32]
|
||||||
NUM_TOKENS = [7, 83, 2048]
|
NUM_TOKENS = [7, 83, 2048]
|
||||||
D = [512, 4096, 5120, 13824]
|
D = [512, 4096, 5120, 13824]
|
||||||
@@ -111,7 +112,7 @@ def native_static_quant_fp8(x, x_s, dtype=torch.float8_e4m3fn):
|
|||||||
return x_q, x_s
|
return x_q, x_s
|
||||||
|
|
||||||
|
|
||||||
class TestStaticQuantFP8(unittest.TestCase):
|
class TestStaticQuantFP8(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16, torch.float32]
|
DTYPES = [torch.half, torch.bfloat16, torch.float32]
|
||||||
NUM_TOKENS = [7, 83, 2048]
|
NUM_TOKENS = [7, 83, 2048]
|
||||||
D = [512, 4096, 5120, 13824]
|
D = [512, 4096, 5120, 13824]
|
||||||
@@ -210,7 +211,7 @@ def native_w8a8_block_fp8_matmul(A, B, As, Bs, block_size, output_dtype=torch.fl
|
|||||||
return C
|
return C
|
||||||
|
|
||||||
|
|
||||||
class TestW8A8BlockFP8Matmul(unittest.TestCase):
|
class TestW8A8BlockFP8Matmul(CustomTestCase):
|
||||||
|
|
||||||
if not _is_cuda:
|
if not _is_cuda:
|
||||||
OUT_DTYPES = [torch.float32, torch.half, torch.bfloat16]
|
OUT_DTYPES = [torch.float32, torch.half, torch.bfloat16]
|
||||||
@@ -331,7 +332,7 @@ def torch_w8a8_block_fp8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
|
|||||||
).sum(dim=1)
|
).sum(dim=1)
|
||||||
|
|
||||||
|
|
||||||
class TestW8A8BlockFP8FusedMoE(unittest.TestCase):
|
class TestW8A8BlockFP8FusedMoE(CustomTestCase):
|
||||||
DTYPES = [torch.float32, torch.half, torch.bfloat16]
|
DTYPES = [torch.float32, torch.half, torch.bfloat16]
|
||||||
M = [1, 33, 64, 222, 1024 * 128]
|
M = [1, 33, 64, 222, 1024 * 128]
|
||||||
N = [128, 1024, 2048]
|
N = [128, 1024, 2048]
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
|
|||||||
silu_and_mul_triton_kernel,
|
silu_and_mul_triton_kernel,
|
||||||
)
|
)
|
||||||
from sglang.srt.layers.moe.topk import select_experts
|
from sglang.srt.layers.moe.topk import select_experts
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
# For test
|
# For test
|
||||||
@@ -232,7 +233,7 @@ def block_dequant(
|
|||||||
return x_dq_block
|
return x_dq_block
|
||||||
|
|
||||||
|
|
||||||
class TestW8A8BlockFP8EPMoE(unittest.TestCase):
|
class TestW8A8BlockFP8EPMoE(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
M = [1, 222, 1024, 2048]
|
M = [1, 222, 1024, 2048]
|
||||||
N = [128, 1024, 2048]
|
N = [128, 1024, 2048]
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.utils import DynamicGradMode
|
from sglang.srt.utils import DynamicGradMode
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestDynamicGradMode(unittest.TestCase):
|
class TestDynamicGradMode(CustomTestCase):
|
||||||
def test_inference(self):
|
def test_inference(self):
|
||||||
# Test inference_mode
|
# Test inference_mode
|
||||||
DynamicGradMode.set_inference_mode(True)
|
DynamicGradMode.set_inference_mode(True)
|
||||||
|
|||||||
@@ -4,9 +4,10 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
|
from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestRMSNorm(unittest.TestCase):
|
class TestRMSNorm(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
NUM_TOKENS = [7, 83, 4096]
|
NUM_TOKENS = [7, 83, 4096]
|
||||||
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]
|
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]
|
||||||
@@ -56,7 +57,7 @@ class TestRMSNorm(unittest.TestCase):
|
|||||||
self._run_rms_norm_test(*params)
|
self._run_rms_norm_test(*params)
|
||||||
|
|
||||||
|
|
||||||
class TestGemmaRMSNorm(unittest.TestCase):
|
class TestGemmaRMSNorm(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
NUM_TOKENS = [7, 83, 4096]
|
NUM_TOKENS = [7, 83, 4096]
|
||||||
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]
|
HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199]
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import random
|
|||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
import unittest
|
import unittest
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -998,3 +999,30 @@ def run_logprob_check(self: unittest.TestCase, arg: Tuple):
|
|||||||
rank += 1
|
rank += 1
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
class CustomTestCase(unittest.TestCase):
|
||||||
|
def _callTestMethod(self, method):
|
||||||
|
_retry_execution(
|
||||||
|
lambda: super(CustomTestCase, self)._callTestMethod(method),
|
||||||
|
max_retry=_get_max_retry(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_max_retry():
|
||||||
|
return int(os.environ.get("SGLANG_TEST_MAX_RETRY", "2" if is_in_ci() else "0"))
|
||||||
|
|
||||||
|
|
||||||
|
def _retry_execution(fn, max_retry: int):
|
||||||
|
if max_retry == 0:
|
||||||
|
fn()
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
fn()
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"retry_execution failed once and will retry. This may be an error or a flaky test. Error: {e}"
|
||||||
|
)
|
||||||
|
traceback.print_exc()
|
||||||
|
_retry_execution(fn, max_retry=max_retry - 1)
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import unittest
|
|||||||
|
|
||||||
from sglang import Anthropic, set_default_backend
|
from sglang import Anthropic, set_default_backend
|
||||||
from sglang.test.test_programs import test_mt_bench, test_stream
|
from sglang.test.test_programs import test_mt_bench, test_stream
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestAnthropicBackend(unittest.TestCase):
|
class TestAnthropicBackend(CustomTestCase):
|
||||||
backend = None
|
backend = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestBind(unittest.TestCase):
|
class TestBind(CustomTestCase):
|
||||||
backend = None
|
backend = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from sglang.lang.choices import (
|
|||||||
token_length_normalized,
|
token_length_normalized,
|
||||||
unconditional_likelihood_normalized,
|
unconditional_likelihood_normalized,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
MOCK_CHOICES_INPUT_DATA = {
|
MOCK_CHOICES_INPUT_DATA = {
|
||||||
"choices": [
|
"choices": [
|
||||||
@@ -51,7 +52,7 @@ MOCK_CHOICES_INPUT_DATA = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TestChoices(unittest.TestCase):
|
class TestChoices(CustomTestCase):
|
||||||
|
|
||||||
def test_token_length_normalized(self):
|
def test_token_length_normalized(self):
|
||||||
"""Confirm 'antidisestablishmentarianism' is selected due to high confidences for
|
"""Confirm 'antidisestablishmentarianism' is selected due to high confidences for
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import unittest
|
|||||||
|
|
||||||
from sglang import LiteLLM, set_default_backend
|
from sglang import LiteLLM, set_default_backend
|
||||||
from sglang.test.test_programs import test_mt_bench, test_stream
|
from sglang.test.test_programs import test_mt_bench, test_stream
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestAnthropicBackend(unittest.TestCase):
|
class TestAnthropicBackend(CustomTestCase):
|
||||||
chat_backend = None
|
chat_backend = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -17,9 +17,10 @@ from sglang.test.test_programs import (
|
|||||||
test_stream,
|
test_stream,
|
||||||
test_tool_use,
|
test_tool_use,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIBackend(unittest.TestCase):
|
class TestOpenAIBackend(CustomTestCase):
|
||||||
instruct_backend = None
|
instruct_backend = None
|
||||||
chat_backend = None
|
chat_backend = None
|
||||||
chat_vision_backend = None
|
chat_vision_backend = None
|
||||||
|
|||||||
@@ -22,10 +22,10 @@ from sglang.test.test_programs import (
|
|||||||
test_stream,
|
test_stream,
|
||||||
test_tool_use,
|
test_tool_use,
|
||||||
)
|
)
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestSRTBackend(unittest.TestCase):
|
class TestSRTBackend(CustomTestCase):
|
||||||
backend = None
|
backend = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import unittest
|
|||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.lang.backend.base_backend import BaseBackend
|
from sglang.lang.backend.base_backend import BaseBackend
|
||||||
from sglang.lang.chat_template import get_chat_template
|
from sglang.lang.chat_template import get_chat_template
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestTracing(unittest.TestCase):
|
class TestTracing(CustomTestCase):
|
||||||
def test_few_shot_qa(self):
|
def test_few_shot_qa(self):
|
||||||
@sgl.function
|
@sgl.function
|
||||||
def few_shot_qa(s, question):
|
def few_shot_qa(s, question):
|
||||||
|
|||||||
@@ -10,9 +10,10 @@ from sglang.test.test_programs import (
|
|||||||
test_parallel_encoding,
|
test_parallel_encoding,
|
||||||
test_stream,
|
test_stream,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestVertexAIBackend(unittest.TestCase):
|
class TestVertexAIBackend(CustomTestCase):
|
||||||
backend = None
|
backend = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.test.runners import HFRunner, SRTRunner
|
from sglang.test.runners import HFRunner, SRTRunner
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
LORA_SETS = [
|
LORA_SETS = [
|
||||||
# {
|
# {
|
||||||
@@ -70,7 +71,7 @@ What do you know about llamas?
|
|||||||
# PROMPTS.append(sample[0]["content"][:2000])
|
# PROMPTS.append(sample[0]["content"][:2000])
|
||||||
|
|
||||||
|
|
||||||
class TestLoRA(unittest.TestCase):
|
class TestLoRA(CustomTestCase):
|
||||||
|
|
||||||
def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
|
def inference(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
|
||||||
print("=================== testing inference =======================")
|
print("=================== testing inference =======================")
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import torch
|
|||||||
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
||||||
|
|
||||||
from sglang.test.runners import HFRunner, SRTRunner
|
from sglang.test.runners import HFRunner, SRTRunner
|
||||||
from sglang.test.test_utils import calculate_rouge_l, is_in_ci
|
from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci
|
||||||
|
|
||||||
CI_LORA_MODELS = [
|
CI_LORA_MODELS = [
|
||||||
LoRAModelCase(
|
LoRAModelCase(
|
||||||
@@ -67,7 +67,7 @@ PROMPTS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestLoRABackend(unittest.TestCase):
|
class TestLoRABackend(CustomTestCase):
|
||||||
def run_backend(
|
def run_backend(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import torch
|
|||||||
from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
||||||
|
|
||||||
from sglang.test.runners import HFRunner, SRTRunner
|
from sglang.test.runners import HFRunner, SRTRunner
|
||||||
from sglang.test.test_utils import calculate_rouge_l, is_in_ci
|
from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci
|
||||||
|
|
||||||
CI_LORA_MODELS = [
|
CI_LORA_MODELS = [
|
||||||
LoRAModelCase(
|
LoRAModelCase(
|
||||||
@@ -69,7 +69,7 @@ PROMPTS = [
|
|||||||
BACKEND = "triton"
|
BACKEND = "triton"
|
||||||
|
|
||||||
|
|
||||||
class TestLoRATP(unittest.TestCase):
|
class TestLoRATP(CustomTestCase):
|
||||||
def run_tp(
|
def run_tp(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from typing import List
|
|||||||
import torch
|
import torch
|
||||||
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
from utils import BACKENDS, TORCH_DTYPES, LoRAAdaptor, LoRAModelCase
|
||||||
|
|
||||||
from sglang.test.test_utils import is_in_ci
|
from sglang.test.test_utils import CustomTestCase, is_in_ci
|
||||||
|
|
||||||
MULTI_LORA_MODELS = [
|
MULTI_LORA_MODELS = [
|
||||||
LoRAModelCase(
|
LoRAModelCase(
|
||||||
@@ -51,7 +51,7 @@ PROMPTS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestMultiLoRABackend(unittest.TestCase):
|
class TestMultiLoRABackend(CustomTestCase):
|
||||||
def run_backend_batch(
|
def run_backend_batch(
|
||||||
self,
|
self,
|
||||||
prompts: List[str],
|
prompts: List[str],
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import torch
|
|||||||
from transformers import AutoConfig, AutoTokenizer
|
from transformers import AutoConfig, AutoTokenizer
|
||||||
|
|
||||||
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
|
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
|
||||||
from sglang.test.test_utils import get_similarities, is_in_ci
|
from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
|
("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
|
||||||
@@ -31,7 +31,7 @@ MODELS = [
|
|||||||
TORCH_DTYPES = [torch.float16]
|
TORCH_DTYPES = [torch.float16]
|
||||||
|
|
||||||
|
|
||||||
class TestEmbeddingModels(unittest.TestCase):
|
class TestEmbeddingModels(CustomTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ from sglang.test.runners import (
|
|||||||
SRTRunner,
|
SRTRunner,
|
||||||
check_close_model_outputs,
|
check_close_model_outputs,
|
||||||
)
|
)
|
||||||
from sglang.test.test_utils import is_in_ci
|
from sglang.test.test_utils import CustomTestCase, is_in_ci
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -71,7 +71,7 @@ ALL_OTHER_MODELS = [
|
|||||||
TORCH_DTYPES = [torch.float16]
|
TORCH_DTYPES = [torch.float16]
|
||||||
|
|
||||||
|
|
||||||
class TestGenerationModels(unittest.TestCase):
|
class TestGenerationModels(CustomTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.test.runners import HFRunner, SRTRunner
|
from sglang.test.runners import HFRunner, SRTRunner
|
||||||
from sglang.test.test_utils import get_similarities
|
from sglang.test.test_utils import CustomTestCase, get_similarities
|
||||||
|
|
||||||
TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
|
TEXTS = "two Subway Series sandwiches with meats, cheese, lettuce, tomatoes, and onions on a black background, accompanied by the Subway Series logo, highlighting a new sandwich series."
|
||||||
IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
|
IMAGES = "https://huggingface.co/datasets/liuhaotian/llava-bench-in-the-wild/resolve/main/images/023.jpg"
|
||||||
@@ -31,7 +31,7 @@ MODELS = [
|
|||||||
TORCH_DTYPES = [torch.float16]
|
TORCH_DTYPES = [torch.float16]
|
||||||
|
|
||||||
|
|
||||||
class TestQmeQwenModels(unittest.TestCase):
|
class TestQmeQwenModels(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
mp.set_start_method("spawn", force=True)
|
mp.set_start_method("spawn", force=True)
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestGrok(unittest.TestCase):
|
class TestGrok(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmzheng/grok-1"
|
cls.model = "lmzheng/grok-1"
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ from sglang.test.few_shot_gsm8k import run_eval
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestQwen2(unittest.TestCase):
|
class TestQwen2(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "Qwen/Qwen2-7B-Instruct"
|
cls.model = "Qwen/Qwen2-7B-Instruct"
|
||||||
@@ -41,7 +42,7 @@ class TestQwen2(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.78)
|
self.assertGreater(metrics["accuracy"], 0.78)
|
||||||
|
|
||||||
|
|
||||||
class TestQwen2FP8(unittest.TestCase):
|
class TestQwen2FP8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
|
cls.model = "neuralmagic/Qwen2-7B-Instruct-FP8"
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import unittest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.test.runners import HFRunner, SRTRunner
|
from sglang.test.runners import HFRunner, SRTRunner
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
|
("LxzGordon/URM-LLaMa-3.1-8B", 1, 4e-2),
|
||||||
@@ -41,7 +42,7 @@ CONVS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestRewardModels(unittest.TestCase):
|
class TestRewardModels(CustomTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.test.test_utils import run_and_check_memory_leak
|
from sglang.test.test_utils import CustomTestCase, run_and_check_memory_leak
|
||||||
|
|
||||||
|
|
||||||
class TestAbort(unittest.TestCase):
|
class TestAbort(CustomTestCase):
|
||||||
def workload_func(self, base_url, model):
|
def workload_func(self, base_url, model):
|
||||||
def process_func():
|
def process_func():
|
||||||
def run_one(_):
|
def run_one(_):
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestAWQ(unittest.TestCase):
|
class TestAWQ(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import unittest
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
get_bool_env_var,
|
get_bool_env_var,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
run_bench_one_batch,
|
run_bench_one_batch,
|
||||||
@@ -10,7 +11,7 @@ from sglang.test.test_utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestBenchOneBatch(unittest.TestCase):
|
class TestBenchOneBatch(CustomTestCase):
|
||||||
def test_bs1(self):
|
def test_bs1(self):
|
||||||
output_throughput = run_bench_one_batch(
|
output_throughput = run_bench_one_batch(
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]
|
DEFAULT_MODEL_NAME_FOR_TEST, ["--cuda-graph-max-bs", "2"]
|
||||||
|
|||||||
@@ -6,13 +6,14 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_FP8_MODEL_NAME_FOR_TEST,
|
DEFAULT_FP8_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
run_bench_serving,
|
run_bench_serving,
|
||||||
write_github_step_summary,
|
write_github_step_summary,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestBenchServing(unittest.TestCase):
|
class TestBenchServing(CustomTestCase):
|
||||||
|
|
||||||
def test_offline_throughput_default(self):
|
def test_offline_throughput_default(self):
|
||||||
res = run_bench_serving(
|
res = run_bench_serving(
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import torch
|
|||||||
|
|
||||||
from sglang.srt.layers.activation import SiluAndMul
|
from sglang.srt.layers.activation import SiluAndMul
|
||||||
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
# For test
|
# For test
|
||||||
@@ -121,7 +122,7 @@ def torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_shape):
|
|||||||
).sum(dim=1)
|
).sum(dim=1)
|
||||||
|
|
||||||
|
|
||||||
class TestW8A8BlockINT8FusedMoE(unittest.TestCase):
|
class TestW8A8BlockINT8FusedMoE(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
M = [1, 33, 64, 222]
|
M = [1, 33, 64, 222]
|
||||||
N = [128, 1024]
|
N = [128, 1024]
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ from sglang.srt.utils import kill_process_tree
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestCacheReport(unittest.TestCase):
|
class TestCacheReport(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.test.test_utils import run_mmlu_test, run_mulit_request_test
|
from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test
|
||||||
|
|
||||||
|
|
||||||
class TestChunkedPrefill(unittest.TestCase):
|
class TestChunkedPrefill(CustomTestCase):
|
||||||
def test_chunked_prefill(self):
|
def test_chunked_prefill(self):
|
||||||
run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)
|
run_mmlu_test(disable_radix_cache=False, enable_mixed_chunk=False)
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
|
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestCreateKvIndices(unittest.TestCase):
|
class TestCreateKvIndices(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
if not torch.cuda.is_available():
|
if not torch.cuda.is_available():
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from sglang.srt.distributed.parallel_state import (
|
|||||||
graph_capture,
|
graph_capture,
|
||||||
initialize_model_parallel,
|
initialize_model_parallel,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
def get_open_port() -> int:
|
def get_open_port() -> int:
|
||||||
@@ -54,7 +55,7 @@ def multi_process_parallel(
|
|||||||
ray.shutdown()
|
ray.shutdown()
|
||||||
|
|
||||||
|
|
||||||
class TestCustomAllReduce(unittest.TestCase):
|
class TestCustomAllReduce(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
random.seed(42)
|
random.seed(42)
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDataParallelism(unittest.TestCase):
|
class TestDataParallelism(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDoubleSparsity(unittest.TestCase):
|
class TestDoubleSparsity(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDPAttentionDP2TP2(unittest.TestCase):
|
class TestDPAttentionDP2TP2(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
run_logprob_check,
|
run_logprob_check,
|
||||||
)
|
)
|
||||||
@@ -33,7 +34,7 @@ prefill_tolerance = 5e-2
|
|||||||
decode_tolerance: float = 5e-2
|
decode_tolerance: float = 5e-2
|
||||||
|
|
||||||
|
|
||||||
class TestEAGLEEngine(unittest.TestCase):
|
class TestEAGLEEngine(CustomTestCase):
|
||||||
BASE_CONFIG = {
|
BASE_CONFIG = {
|
||||||
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||||
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||||
@@ -179,7 +180,7 @@ class TestEAGLE3Engine(TestEAGLEEngine):
|
|||||||
NUM_CONFIGS = 1
|
NUM_CONFIGS = 1
|
||||||
|
|
||||||
|
|
||||||
class TestEAGLEServer(unittest.TestCase):
|
class TestEAGLEServer(CustomTestCase):
|
||||||
PROMPTS = [
|
PROMPTS = [
|
||||||
"[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
|
"[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nToday is a sunny day and I like[/INST]"
|
||||||
'[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',
|
'[INST] <<SYS>>\\nYou are a helpful assistant.\\n<</SYS>>\\nWhat are the mental triggers in Jeff Walker\'s Product Launch Formula and "Launch" book?[/INST]',
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -42,7 +43,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEBNFConstrained(unittest.TestCase):
|
class TestEBNFConstrained(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
setup_class(cls, "xgrammar", disable_overlap=False)
|
setup_class(cls, "xgrammar", disable_overlap=False)
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "intfloat/e5-mistral-7b-instruct"
|
cls.model = "intfloat/e5-mistral-7b-instruct"
|
||||||
|
|||||||
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
write_github_step_summary,
|
write_github_step_summary,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalAccuracyLarge(unittest.TestCase):
|
class TestEvalAccuracyLarge(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalFP8Accuracy(unittest.TestCase):
|
class TestEvalFP8Accuracy(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
|
cls.model = DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST
|
||||||
@@ -44,7 +45,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
|
|||||||
self.assertGreaterEqual(metrics["score"], 0.61)
|
self.assertGreaterEqual(metrics["score"], 0.61)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
|
class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):
|
||||||
|
|
||||||
def _run_test(self, model, other_args, expected_score):
|
def _run_test(self, model, other_args, expected_score):
|
||||||
base_url = DEFAULT_URL_FOR_TEST
|
base_url = DEFAULT_URL_FOR_TEST
|
||||||
@@ -109,7 +110,7 @@ class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalFP8ModelOptQuantAccuracy(unittest.TestCase):
|
class TestEvalFP8ModelOptQuantAccuracy(CustomTestCase):
|
||||||
|
|
||||||
def _run_test(self, model, other_args, expected_score):
|
def _run_test(self, model, other_args, expected_score):
|
||||||
base_url = DEFAULT_URL_FOR_TEST
|
base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestExpertDistribution(unittest.TestCase):
|
class TestExpertDistribution(CustomTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
# Clean up any existing expert distribution files before each test
|
# Clean up any existing expert distribution files before each test
|
||||||
for f in glob.glob("expert_distribution_*.csv"):
|
for f in glob.glob("expert_distribution_*.csv"):
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.srt.utils import kill_process_tree
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestFimCompletion(unittest.TestCase):
|
class TestFimCompletion(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "deepseek-ai/deepseek-coder-1.3b-base"
|
cls.model = "deepseek-ai/deepseek-coder-1.3b-base"
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.fp8_kernel import (
|
|||||||
per_token_group_quant_fp8,
|
per_token_group_quant_fp8,
|
||||||
w8a8_block_fp8_matmul,
|
w8a8_block_fp8_matmul,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestFP8Base(unittest.TestCase):
|
class TestFP8Base(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.M = 256
|
cls.M = 256
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestFp8KvcacheBase(unittest.TestCase):
|
class TestFp8KvcacheBase(CustomTestCase):
|
||||||
model_config = None
|
model_config = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServerFunctionCalling(unittest.TestCase):
|
class TestOpenAIServerFunctionCalling(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
# Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,9 +7,10 @@ from vllm.model_executor.layers.fused_moe import fused_moe as fused_moe_vllm
|
|||||||
|
|
||||||
from sglang.srt.layers.activation import SiluAndMul
|
from sglang.srt.layers.activation import SiluAndMul
|
||||||
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestFusedMOE(unittest.TestCase):
|
class TestFusedMOE(CustomTestCase):
|
||||||
NUM_EXPERTS = [8, 64]
|
NUM_EXPERTS = [8, 64]
|
||||||
TOP_KS = [2, 6]
|
TOP_KS = [2, 6]
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
@@ -26,7 +27,7 @@ def _process_return(ret):
|
|||||||
return np.array(ret)
|
return np.array(ret)
|
||||||
|
|
||||||
|
|
||||||
class TestGetWeightsByName(unittest.TestCase):
|
class TestGetWeightsByName(CustomTestCase):
|
||||||
|
|
||||||
def init_hf_model(self, model_name, tie_word_embeddings):
|
def init_hf_model(self, model_name, tie_word_embeddings):
|
||||||
self.hf_model = AutoModelForCausalLM.from_pretrained(
|
self.hf_model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import unittest
|
|||||||
from huggingface_hub import hf_hub_download
|
from huggingface_hub import hf_hub_download
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestGGUF(unittest.TestCase):
|
class TestGGUF(CustomTestCase):
|
||||||
def test_models(self):
|
def test_models(self):
|
||||||
prompt = "Today is a sunny day and I like"
|
prompt = "Today is a sunny day and I like"
|
||||||
sampling_params = {"temperature": 0, "max_new_tokens": 8}
|
sampling_params = {"temperature": 0, "max_new_tokens": 8}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -102,7 +103,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
|
|||||||
# GPTQ with Dynamic Per/Module Quantization Control
|
# GPTQ with Dynamic Per/Module Quantization Control
|
||||||
# Leverages GPTQModel (pypi) to produce the `dynamic` models
|
# Leverages GPTQModel (pypi) to produce the `dynamic` models
|
||||||
# Test GPTQ fallback kernel that is not Marlin
|
# Test GPTQ fallback kernel that is not Marlin
|
||||||
class TestGPTQModelDynamic(unittest.TestCase):
|
class TestGPTQModelDynamic(CustomTestCase):
|
||||||
MODEL_PATH = (
|
MODEL_PATH = (
|
||||||
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
|
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
|
||||||
)
|
)
|
||||||
@@ -157,7 +158,7 @@ class TestGPTQModelDynamic(unittest.TestCase):
|
|||||||
# GPTQ with Dynamic Per/Module Quantization Control
|
# GPTQ with Dynamic Per/Module Quantization Control
|
||||||
# Leverages GPTQModel (pypi) to produce the `dynamic` models
|
# Leverages GPTQModel (pypi) to produce the `dynamic` models
|
||||||
# Test Marlin kernel
|
# Test Marlin kernel
|
||||||
class TestGPTQModelDynamicWithMarlin(unittest.TestCase):
|
class TestGPTQModelDynamicWithMarlin(CustomTestCase):
|
||||||
MODEL_PATH = (
|
MODEL_PATH = (
|
||||||
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
|
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -3,11 +3,12 @@ import unittest
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestHealthCheck(unittest.TestCase):
|
class TestHealthCheck(CustomTestCase):
|
||||||
def test_health_check(self):
|
def test_health_check(self):
|
||||||
"""Test that metrics endpoint returns data when enabled"""
|
"""Test that metrics endpoint returns data when enabled"""
|
||||||
with self.assertRaises(TimeoutError):
|
with self.assertRaises(TimeoutError):
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestPageSize(unittest.TestCase):
|
class TestPageSize(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestHierarchicalMLA(unittest.TestCase):
|
class TestHierarchicalMLA(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ import torch
|
|||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestHiddenState(unittest.TestCase):
|
class TestHiddenState(CustomTestCase):
|
||||||
def test_return_hidden_states(self):
|
def test_return_hidden_states(self):
|
||||||
prompts = ["Today is", "Today is a sunny day and I like"]
|
prompts = ["Today is", "Today is a sunny day and I like"]
|
||||||
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestInputEmbeds(unittest.TestCase):
|
class TestInputEmbeds(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import torch
|
|||||||
from sglang.srt.layers.activation import SiluAndMul
|
from sglang.srt.layers.activation import SiluAndMul
|
||||||
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
|
||||||
from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
|
from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
|
def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
|
||||||
@@ -71,7 +72,7 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk):
|
|||||||
).sum(dim=1)
|
).sum(dim=1)
|
||||||
|
|
||||||
|
|
||||||
class TestW8A8Int8FusedMoE(unittest.TestCase):
|
class TestW8A8Int8FusedMoE(CustomTestCase):
|
||||||
DTYPES = [torch.half, torch.bfloat16]
|
DTYPES = [torch.half, torch.bfloat16]
|
||||||
M = [1, 33]
|
M = [1, 33]
|
||||||
N = [128, 1024]
|
N = [128, 1024]
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,7 +51,7 @@ def setup_class(cls, backend: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestJSONConstrainedOutlinesBackend(unittest.TestCase):
|
class TestJSONConstrainedOutlinesBackend(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
setup_class(cls, backend="outlines")
|
setup_class(cls, backend="outlines")
|
||||||
|
|||||||
@@ -17,11 +17,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
STDERR_FILENAME,
|
STDERR_FILENAME,
|
||||||
STDOUT_FILENAME,
|
STDOUT_FILENAME,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestLargeMaxNewTokens(unittest.TestCase):
|
class TestLargeMaxNewTokens(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from sglang.srt.utils import kill_process_tree
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,7 +19,7 @@ The story should span multiple events, challenges, and character developments ov
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class TestMatchedStop(unittest.TestCase):
|
class TestMatchedStop(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEnableMetrics(unittest.TestCase):
|
class TestEnableMetrics(CustomTestCase):
|
||||||
def test_metrics_enabled(self):
|
def test_metrics_enabled(self):
|
||||||
"""Test that metrics endpoint returns data when enabled"""
|
"""Test that metrics endpoint returns data when enabled"""
|
||||||
process = popen_launch_server(
|
process = popen_launch_server(
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestMLA(unittest.TestCase):
|
class TestMLA(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestMLADeepseekV3(unittest.TestCase):
|
class TestMLADeepseekV3(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
@@ -48,7 +49,7 @@ class TestMLADeepseekV3(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.62)
|
self.assertGreater(metrics["accuracy"], 0.62)
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekV3MTP(unittest.TestCase):
|
class TestDeepseekV3MTP(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestFlashinferMLA(unittest.TestCase):
|
class TestFlashinferMLA(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
@@ -55,7 +56,7 @@ class TestFlashinferMLA(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.62)
|
self.assertGreater(metrics["accuracy"], 0.62)
|
||||||
|
|
||||||
|
|
||||||
class TestFlashinferMLANoRagged(unittest.TestCase):
|
class TestFlashinferMLANoRagged(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
@@ -99,7 +100,7 @@ class TestFlashinferMLANoRagged(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.62)
|
self.assertGreater(metrics["accuracy"], 0.62)
|
||||||
|
|
||||||
|
|
||||||
class TestFlashinferMLAMTP(unittest.TestCase):
|
class TestFlashinferMLAMTP(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestMLA(unittest.TestCase):
|
class TestMLA(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
|
class TestMLADeepseekV3ChannelInt8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
|
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
|
||||||
@@ -48,7 +49,7 @@ class TestMLADeepseekV3ChannelInt8(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.62)
|
self.assertGreater(metrics["accuracy"], 0.62)
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
|
class TestDeepseekV3MTPChannelInt8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
|
cls.model = "sgl-project/sglang-ci-dsv3-channel-int8-test"
|
||||||
@@ -109,7 +110,7 @@ class TestDeepseekV3MTPChannelInt8(unittest.TestCase):
|
|||||||
self.assertGreater(avg_spec_accept_length, 2.5)
|
self.assertGreater(avg_spec_accept_length, 2.5)
|
||||||
|
|
||||||
|
|
||||||
class TestMLADeepseekV3BlockInt8(unittest.TestCase):
|
class TestMLADeepseekV3BlockInt8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
|
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
|
||||||
@@ -144,7 +145,7 @@ class TestMLADeepseekV3BlockInt8(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["accuracy"], 0.62)
|
self.assertGreater(metrics["accuracy"], 0.62)
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekV3MTPBlockInt8(unittest.TestCase):
|
class TestDeepseekV3MTPBlockInt8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
|
cls.model = "sgl-project/sglang-ci-dsv3-block-int8-test"
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDeepseekTP2(unittest.TestCase):
|
class TestDeepseekTP2(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmsys/sglang-ci-dsv3-test"
|
cls.model = "lmsys/sglang-ci-dsv3-test"
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ from sglang.srt.layers.quantization.modelopt_quant import (
|
|||||||
ModelOptFp8Config,
|
ModelOptFp8Config,
|
||||||
ModelOptFp8KVCacheMethod,
|
ModelOptFp8KVCacheMethod,
|
||||||
)
|
)
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestModelOptFp8KVCacheMethod(unittest.TestCase):
|
class TestModelOptFp8KVCacheMethod(CustomTestCase):
|
||||||
def test_kv_cache_method_initialization(self):
|
def test_kv_cache_method_initialization(self):
|
||||||
"""Test that ModelOptFp8KVCacheMethod can be instantiated and
|
"""Test that ModelOptFp8KVCacheMethod can be instantiated and
|
||||||
inherits from BaseKVCacheMethod."""
|
inherits from BaseKVCacheMethod."""
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ import unittest
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from sglang.srt.utils import prepare_model_and_tokenizer
|
from sglang.srt.utils import prepare_model_and_tokenizer
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadFromModelScope(unittest.TestCase):
|
class TestDownloadFromModelScope(CustomTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDeepEPMoE(unittest.TestCase):
|
class TestDeepEPMoE(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEpMoE(unittest.TestCase):
|
class TestEpMoE(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
@@ -59,7 +60,7 @@ class TestEpMoE(unittest.TestCase):
|
|||||||
self.assertGreater(metrics["score"], 0.8)
|
self.assertGreater(metrics["score"], 0.8)
|
||||||
|
|
||||||
|
|
||||||
class TestEpMoEFP8(unittest.TestCase):
|
class TestEpMoEFP8(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
write_github_step_summary,
|
write_github_step_summary,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestMoEEvalAccuracyLarge(unittest.TestCase):
|
class TestMoEEvalAccuracyLarge(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
write_github_step_summary,
|
write_github_step_summary,
|
||||||
@@ -129,7 +130,7 @@ def check_model_scores(results):
|
|||||||
raise AssertionError("\n".join(failed_models))
|
raise AssertionError("\n".join(failed_models))
|
||||||
|
|
||||||
|
|
||||||
class TestNightlyGsm8KEval(unittest.TestCase):
|
class TestNightlyGsm8KEval(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model_groups = [
|
cls.model_groups = [
|
||||||
|
|||||||
@@ -14,11 +14,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2,
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestNightlyHumanEval(unittest.TestCase):
|
class TestNightlyHumanEval(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
if is_in_ci():
|
if is_in_ci():
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestEvalAccuracyLarge(unittest.TestCase):
|
class TestEvalAccuracyLarge(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -2,12 +2,13 @@ import unittest
|
|||||||
|
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
run_bench_serving,
|
run_bench_serving,
|
||||||
run_mmlu_test,
|
run_mmlu_test,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestNoChunkedPrefill(unittest.TestCase):
|
class TestNoChunkedPrefill(CustomTestCase):
|
||||||
|
|
||||||
def test_no_chunked_prefill(self):
|
def test_no_chunked_prefill(self):
|
||||||
run_mmlu_test(
|
run_mmlu_test(
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ python3 test_overlap_schedule.py
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.test.test_utils import run_mmlu_test
|
from sglang.test.test_utils import CustomTestCase, run_mmlu_test
|
||||||
|
|
||||||
|
|
||||||
class TestOverlapSchedule(unittest.TestCase):
|
class TestOverlapSchedule(CustomTestCase):
|
||||||
def test_no_radix_attention_chunked_prefill(self):
|
def test_no_radix_attention_chunked_prefill(self):
|
||||||
run_mmlu_test(
|
run_mmlu_test(
|
||||||
disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True
|
disable_radix_cache=True, chunked_prefill_size=32, disable_overlap=True
|
||||||
|
|||||||
@@ -18,11 +18,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIServer(unittest.TestCase):
|
class TestOpenAIServer(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
@@ -541,7 +542,7 @@ The SmartHome Mini is a compact smart home assistant available in black or white
|
|||||||
# EBNF Test Class: TestOpenAIServerEBNF
|
# EBNF Test Class: TestOpenAIServerEBNF
|
||||||
# Launches the server with xgrammar, has only EBNF tests
|
# Launches the server with xgrammar, has only EBNF tests
|
||||||
# -------------------------------------------------------------------------
|
# -------------------------------------------------------------------------
|
||||||
class TestOpenAIServerEBNF(unittest.TestCase):
|
class TestOpenAIServerEBNF(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
@@ -624,7 +625,7 @@ class TestOpenAIServerEBNF(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIEmbedding(unittest.TestCase):
|
class TestOpenAIEmbedding(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestPageSize(unittest.TestCase):
|
class TestPageSize(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1"
|
os.environ["SGLANG_DEBUG_MEMORY_POOL"] = "1"
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestPenalty(unittest.TestCase):
|
class TestPenalty(CustomTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestPyTorchSamplingBackend(unittest.TestCase):
|
class TestPyTorchSamplingBackend(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
kill_process_tree,
|
kill_process_tree,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
@@ -59,7 +60,7 @@ def run_test(base_url, nodes):
|
|||||||
assert res.status_code == 200
|
assert res.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
class TestRadixCacheFCFS(unittest.TestCase):
|
class TestRadixCacheFCFS(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -20,11 +20,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_REASONING_MODEL_NAME_FOR_TEST,
|
DEFAULT_REASONING_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestReasoningContentAPI(unittest.TestCase):
|
class TestReasoningContentAPI(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
|
||||||
@@ -181,7 +182,7 @@ class TestReasoningContentAPI(unittest.TestCase):
|
|||||||
assert len(response.choices[0].message.content) > 0
|
assert len(response.choices[0].message.content) > 0
|
||||||
|
|
||||||
|
|
||||||
class TestReasoningContentWithoutParser(unittest.TestCase):
|
class TestReasoningContentWithoutParser(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,7 +42,7 @@ def setup_class(cls, backend: str, disable_overlap: bool):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestRegexConstrained(unittest.TestCase):
|
class TestRegexConstrained(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
setup_class(cls, "xgrammar", disable_overlap=False)
|
setup_class(cls, "xgrammar", disable_overlap=False)
|
||||||
|
|||||||
@@ -5,13 +5,13 @@ import torch
|
|||||||
from transformers import AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
# (temporarily) set to true to observe memory usage in nvidia-smi more clearly
|
# (temporarily) set to true to observe memory usage in nvidia-smi more clearly
|
||||||
_DEBUG_EXTRA = True
|
_DEBUG_EXTRA = True
|
||||||
|
|
||||||
|
|
||||||
class TestReleaseMemoryOccupation(unittest.TestCase):
|
class TestReleaseMemoryOccupation(CustomTestCase):
|
||||||
def test_release_and_resume_occupation(self):
|
def test_release_and_resume_occupation(self):
|
||||||
prompt = "Today is a sunny day and I like"
|
prompt = "Today is a sunny day and I like"
|
||||||
sampling_params = {"temperature": 0, "max_new_tokens": 8}
|
sampling_params = {"temperature": 0, "max_new_tokens": 8}
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestRequestLengthValidation(unittest.TestCase):
|
class TestRequestLengthValidation(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestRetractDecode(unittest.TestCase):
|
class TestRetractDecode(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
os.environ["SGLANG_TEST_RETRACT"] = "1"
|
os.environ["SGLANG_TEST_RETRACT"] = "1"
|
||||||
@@ -40,7 +41,7 @@ class TestRetractDecode(unittest.TestCase):
|
|||||||
self.assertGreaterEqual(metrics["score"], 0.65)
|
self.assertGreaterEqual(metrics["score"], 0.65)
|
||||||
|
|
||||||
|
|
||||||
class TestRetractDecodeChunkCache(unittest.TestCase):
|
class TestRetractDecodeChunkCache(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
os.environ["SGLANG_TEST_RETRACT"] = "1"
|
os.environ["SGLANG_TEST_RETRACT"] = "1"
|
||||||
|
|||||||
@@ -13,11 +13,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSageMakerServer(unittest.TestCase):
|
class TestSageMakerServer(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -8,9 +8,10 @@ from sglang.srt.managers.schedule_policy import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
|
from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
|
||||||
from sglang.srt.sampling.sampling_params import SamplingParams
|
from sglang.srt.sampling.sampling_params import SamplingParams
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestSchedulePolicy(unittest.TestCase):
|
class TestSchedulePolicy(CustomTestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.tree_cache = RadixCache(None, None, False)
|
self.tree_cache = RadixCache(None, None, False)
|
||||||
|
|||||||
@@ -2,9 +2,10 @@ import json
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.srt.server_args import prepare_server_args
|
from sglang.srt.server_args import prepare_server_args
|
||||||
|
from sglang.test.test_utils import CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestPrepareServerArgs(unittest.TestCase):
|
class TestPrepareServerArgs(CustomTestCase):
|
||||||
def test_prepare_server_args(self):
|
def test_prepare_server_args(self):
|
||||||
server_args = prepare_server_args(
|
server_args = prepare_server_args(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,7 +28,7 @@ def remove_prefix(text: str, prefix: str) -> str:
|
|||||||
return text[len(prefix) :] if text.startswith(prefix) else text
|
return text[len(prefix) :] if text.startswith(prefix) else text
|
||||||
|
|
||||||
|
|
||||||
class TestSessionControl(unittest.TestCase):
|
class TestSessionControl(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
@@ -560,7 +561,7 @@ class TestSessionControl(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSessionControlVision(unittest.TestCase):
|
class TestSessionControlVision(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = "lmms-lab/llava-onevision-qwen2-7b-ov"
|
cls.model = "lmms-lab/llava-onevision-qwen2-7b-ov"
|
||||||
|
|||||||
@@ -19,11 +19,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_VLM_MODEL_NAME,
|
DEFAULT_SMALL_VLM_MODEL_NAME,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSkipTokenizerInit(unittest.TestCase):
|
class TestSkipTokenizerInit(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -20,12 +20,13 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
run_logprob_check,
|
run_logprob_check,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSRTEndpoint(unittest.TestCase):
|
class TestSRTEndpoint(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -18,10 +18,11 @@ from sglang.test.few_shot_gsm8k_engine import run_eval
|
|||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSRTEngine(unittest.TestCase):
|
class TestSRTEngine(CustomTestCase):
|
||||||
|
|
||||||
def test_1_engine_runtime_consistency(self):
|
def test_1_engine_runtime_consistency(self):
|
||||||
prompt = "Today is a sunny day and I like"
|
prompt = "Today is a sunny day and I like"
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
class TestSRTEngineWithQuantArgs(unittest.TestCase):
|
class TestSRTEngineWithQuantArgs(CustomTestCase):
|
||||||
|
|
||||||
def test_1_quantization_args(self):
|
def test_1_quantization_args(self):
|
||||||
|
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTorchCompile(unittest.TestCase):
|
class TestTorchCompile(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTorchCompileMoe(unittest.TestCase):
|
class TestTorchCompileMoe(CustomTestCase):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
|
||||||
|
|||||||
@@ -12,13 +12,14 @@ from sglang.test.test_utils import (
|
|||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
CustomTestCase,
|
||||||
is_in_ci,
|
is_in_ci,
|
||||||
popen_launch_server,
|
popen_launch_server,
|
||||||
run_bench_one_batch,
|
run_bench_one_batch,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTorchNativeAttnBackend(unittest.TestCase):
|
class TestTorchNativeAttnBackend(CustomTestCase):
|
||||||
def test_latency(self):
|
def test_latency(self):
|
||||||
output_throughput = run_bench_one_batch(
|
output_throughput = run_bench_one_batch(
|
||||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.test.test_utils import is_in_ci, run_bench_one_batch
|
from sglang.test.test_utils import CustomTestCase, is_in_ci, run_bench_one_batch
|
||||||
|
|
||||||
|
|
||||||
class TestTorchTP(unittest.TestCase):
|
class TestTorchTP(CustomTestCase):
|
||||||
def test_torch_native_llama(self):
|
def test_torch_native_llama(self):
|
||||||
output_throughput = run_bench_one_batch(
|
output_throughput = run_bench_one_batch(
|
||||||
"meta-llama/Meta-Llama-3-8B",
|
"meta-llama/Meta-Llama-3-8B",
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user