[8/N] MoE Refactor: deprecate EPMoE (#11211)

This commit is contained in:
Cheng Wan
2025-10-07 21:51:41 -07:00
committed by GitHub
parent 7c3f07dbcb
commit 3c06b673af
19 changed files with 526 additions and 1808 deletions

View File

@@ -12,7 +12,7 @@ from sglang.test.test_utils import (
)
class TestEpMoE(CustomTestCase):
class TestEp(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
@@ -34,18 +34,6 @@ class TestEpMoE(CustomTestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=64,
num_threads=32,
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.5)
def test_mgsm_en(self):
args = SimpleNamespace(
base_url=self.base_url,
@@ -59,7 +47,7 @@ class TestEpMoE(CustomTestCase):
self.assertGreaterEqual(metrics["score"], 0.8)
class TestEpMoEFP8(CustomTestCase):
class TestEpDeepGEMM(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
@@ -76,6 +64,8 @@ class TestEpMoEFP8(CustomTestCase):
"2",
"--quantization",
"fp8",
"--moe-runner-backend",
"deep_gemm",
],
)
@@ -83,18 +73,6 @@ class TestEpMoEFP8(CustomTestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=64,
num_threads=32,
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.5)
def test_mgsm_en(self):
args = SimpleNamespace(
base_url=self.base_url,

View File

@@ -130,6 +130,7 @@ suites = {
TestFile("test_modelopt_loader.py", 30),
],
"per-commit-2-gpu": [
TestFile("ep/test_moe_ep.py", 140),
TestFile("lora/test_lora_tp.py", 116),
TestFile("rl/test_update_weights_from_distributed.py", 103),
TestFile("test_data_parallelism.py", 73),