[5/N] MoE Refactor: Update MoE parallelism arguments (#8658)

This commit is contained in:
Cheng Wan
2025-08-01 01:20:03 -07:00
committed by GitHub
parent c8d3a402c1
commit 6c88f6c8d9
38 changed files with 342 additions and 299 deletions

View File

@@ -28,9 +28,8 @@ spec:
- --enable-dp-lm-head
- --dp-size
- "16"
- --enable-deepep-moe
- --deepep-mode
- low_latency
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- decode
- --mem-fraction-static
@@ -166,9 +165,8 @@ spec:
- --enable-dp-lm-head
- --dp-size
- "16"
- --enable-deepep-moe
- --deepep-mode
- low_latency
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- decode
- --mem-fraction-static

View File

@@ -38,9 +38,8 @@ spec:
- --dp-size
- "16"
- --disable-radix-cache
- --enable-deepep-moe
- --deepep-mode
- normal
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- prefill
- --mem-fraction-static
@@ -184,9 +183,8 @@ spec:
- --dp-size
- "16"
- --disable-radix-cache
- --enable-deepep-moe
- --deepep-mode
- normal
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- prefill
- --mem-fraction-static

View File

@@ -64,9 +64,8 @@ spec:
- --dp-size
- "16"
- --disable-radix-cache
- --enable-deepep-moe
- --deepep-mode
- normal
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- prefill
- --mem-fraction-static
@@ -212,9 +211,8 @@ spec:
- --dp-size
- "16"
- --disable-radix-cache
- --enable-deepep-moe
- --deepep-mode
- normal
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- prefill
- --mem-fraction-static
@@ -373,9 +371,8 @@ spec:
- --enable-dp-lm-head
- --dp-size
- "16"
- --enable-deepep-moe
- --deepep-mode
- low_latency
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- decode
- --mem-fraction-static
@@ -508,9 +505,8 @@ spec:
#- --enable-two-batch-overlap
- --dp-size
- "16"
- --enable-deepep-moe
- --deepep-mode
- low_latency
- --moe-a2a-backend
- deepep
- --disaggregation-mode
- decode
- --mem-fraction-static