[5/N] MoE Refactor: Update MoE parallelism arguments (#8658)

This commit is contained in:
Cheng Wan
2025-08-01 01:20:03 -07:00
committed by GitHub
parent c8d3a402c1
commit 6c88f6c8d9
38 changed files with 342 additions and 299 deletions

View File

@@ -31,7 +31,8 @@ class TestPureDP(CustomTestCase):
"--enable-dp-attention",
"--dp",
"4",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
"--max-running-requests",
@@ -77,7 +78,8 @@ class TestHybridDPTP(CustomTestCase):
"--enable-dp-attention",
"--dp",
"2",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
"--max-running-requests",
@@ -118,7 +120,8 @@ class TestTP(CustomTestCase):
"--trust-remote-code",
"--tp",
"4",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
"--max-running-requests",
@@ -166,7 +169,8 @@ class TestNoGatherdBuffer(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -212,7 +216,8 @@ class TestTBO(CustomTestCase):
"4",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--enable-two-batch-overlap",
"--cuda-graph-max-bs",
"128",
@@ -259,7 +264,8 @@ class TestMTP(CustomTestCase):
"--dp",
"2",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -326,7 +332,8 @@ class TestMTPWithTBO(CustomTestCase):
"--dp-size",
"4",
"--enable-two-batch-overlap",
"--enable-deepep-moe",
"--moe-a2a-backend",
"deepep",
"--trust-remote-code",
"--speculative-algorithm",
"EAGLE",