[5/N] MoE Refactor: Update MoE parallelism arguments (#8658)

This commit is contained in:
Cheng Wan
2025-08-01 01:20:03 -07:00
committed by GitHub
parent c8d3a402c1
commit 6c88f6c8d9
38 changed files with 342 additions and 299 deletions

View File

@@ -407,9 +407,8 @@ class Test10(CustomTestCase):
"--trust-remote-code",
"--tp",
"8",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -449,9 +448,8 @@ class Test11(CustomTestCase):
"--enable-dp-attention",
"--dp",
"4",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -491,9 +489,8 @@ class Test12(CustomTestCase):
"--enable-dp-attention",
"--dp",
"8",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -532,9 +529,8 @@ class Test13(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -576,9 +572,8 @@ class Test14(CustomTestCase):
"4",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -620,9 +615,8 @@ class Test15(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -663,9 +657,8 @@ class Test16(CustomTestCase):
"--dp",
"4",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -706,9 +699,8 @@ class Test17(CustomTestCase):
"--dp",
"8",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -751,9 +743,8 @@ class Test18(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -796,9 +787,8 @@ class Test19(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"128",
],
@@ -835,7 +825,8 @@ class Test20(CustomTestCase):
"--trust-remote-code",
"--tp",
"8",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -873,7 +864,8 @@ class Test21(CustomTestCase):
"--enable-dp-attention",
"--dp",
"4",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -911,7 +903,8 @@ class Test22(CustomTestCase):
"--enable-dp-attention",
"--dp",
"8",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -948,7 +941,8 @@ class Test23(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -988,7 +982,8 @@ class Test24(CustomTestCase):
"4",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1028,7 +1023,8 @@ class Test25(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1067,7 +1063,8 @@ class Test26(CustomTestCase):
"--dp",
"4",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1106,7 +1103,8 @@ class Test27(CustomTestCase):
"--dp",
"8",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1147,7 +1145,8 @@ class Test28(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1188,7 +1187,8 @@ class Test29(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
],
)
@@ -1701,9 +1701,8 @@ class Test40(CustomTestCase):
"--trust-remote-code",
"--tp",
"8",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -1755,9 +1754,8 @@ class Test41(CustomTestCase):
"--enable-dp-attention",
"--dp",
"4",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -1809,9 +1807,8 @@ class Test42(CustomTestCase):
"--enable-dp-attention",
"--dp",
"8",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -1862,9 +1859,8 @@ class Test43(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -1918,9 +1914,8 @@ class Test44(CustomTestCase):
"4",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -1974,9 +1969,8 @@ class Test45(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -2029,9 +2023,8 @@ class Test46(CustomTestCase):
"--dp",
"4",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -2084,9 +2077,8 @@ class Test47(CustomTestCase):
"--dp",
"8",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -2141,9 +2133,8 @@ class Test48(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -2198,9 +2189,8 @@ class Test49(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--deepep-mode",
"auto",
"--moe-a2a-backend",
"deepep",
"--cuda-graph-max-bs",
"32",
"--max-running-requests",
@@ -2249,7 +2239,8 @@ class Test50(CustomTestCase):
"--trust-remote-code",
"--tp",
"8",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2297,7 +2288,8 @@ class Test51(CustomTestCase):
"--enable-dp-attention",
"--dp",
"4",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2345,7 +2337,8 @@ class Test52(CustomTestCase):
"--enable-dp-attention",
"--dp",
"8",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2392,7 +2385,8 @@ class Test53(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2442,7 +2436,8 @@ class Test54(CustomTestCase):
"4",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2492,7 +2487,8 @@ class Test55(CustomTestCase):
"8",
"--moe-dense-tp-size",
"1",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2541,7 +2537,8 @@ class Test56(CustomTestCase):
"--dp",
"4",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2590,7 +2587,8 @@ class Test57(CustomTestCase):
"--dp",
"8",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2641,7 +2639,8 @@ class Test58(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
@@ -2692,7 +2691,8 @@ class Test59(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--enable-dp-lm-head",
"--enable-ep-moe",
"--ep",
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",