[ci]use H20 to run disaggregation test (#11543)
This commit is contained in:
@@ -163,9 +163,7 @@ suites = {
|
||||
TestFile("lora/test_lora_llama4.py", 400),
|
||||
TestFile("test_deepseek_v3_basic.py", 275),
|
||||
TestFile("test_deepseek_v3_mtp.py", 275),
|
||||
TestFile("test_disaggregation_different_tp.py", 600),
|
||||
TestFile("test_disaggregation_hybrid_attention.py", 200),
|
||||
TestFile("test_disaggregation_pp.py", 140),
|
||||
],
|
||||
"per-commit-4-gpu-b200": [
|
||||
# TestFile("test_gpt_oss_4gpu.py", 600),
|
||||
@@ -182,6 +180,8 @@ suites = {
|
||||
TestFile("test_deepseek_v32_basic.py", 275),
|
||||
],
|
||||
"per-commit-8-gpu-h20": [
|
||||
TestFile("test_disaggregation_different_tp.py", 600),
|
||||
TestFile("test_disaggregation_pp.py", 140),
|
||||
TestFile("quant/test_w4a8_deepseek_v3.py", 371),
|
||||
],
|
||||
"vllm_dependency_test": [
|
||||
|
||||
@@ -9,6 +9,7 @@ from sglang.test.test_utils import (
|
||||
DEFAULT_MODEL_NAME_FOR_TEST_MLA,
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
popen_launch_pd_server,
|
||||
try_cached_model,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,7 +20,7 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
|
||||
# Temporarily disable JIT DeepGEMM
|
||||
envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
|
||||
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
|
||||
cls.model = try_cached_model(DEFAULT_MODEL_NAME_FOR_TEST_MLA)
|
||||
|
||||
# Non blocking start servers
|
||||
cls.start_prefill()
|
||||
@@ -90,7 +91,7 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
|
||||
# Temporarily disable JIT DeepGEMM
|
||||
envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
|
||||
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
|
||||
cls.model = try_cached_model(DEFAULT_MODEL_NAME_FOR_TEST_MLA)
|
||||
|
||||
# Non blocking start servers
|
||||
cls.start_prefill()
|
||||
@@ -161,7 +162,7 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
|
||||
# Temporarily disable JIT DeepGEMM
|
||||
envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
|
||||
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.model = try_cached_model(DEFAULT_MODEL_NAME_FOR_TEST)
|
||||
|
||||
# Non blocking start servers
|
||||
cls.start_prefill()
|
||||
@@ -232,7 +233,7 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
|
||||
# Temporarily disable JIT DeepGEMM
|
||||
envs.SGLANG_ENABLE_JIT_DEEPGEMM.set(False)
|
||||
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.model = try_cached_model(DEFAULT_MODEL_NAME_FOR_TEST)
|
||||
|
||||
# Non blocking start servers
|
||||
cls.start_prefill()
|
||||
|
||||
@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
|
||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
popen_launch_pd_server,
|
||||
try_cached_model,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,7 +16,7 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super().setUpClass()
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.model = try_cached_model(DEFAULT_MODEL_NAME_FOR_TEST)
|
||||
|
||||
# Non blocking start servers
|
||||
cls.start_prefill()
|
||||
|
||||
Reference in New Issue
Block a user