[Refactor] Rename n_share_experts_fusion as num_fused_shared_experts (#6735)

2025-06-03 17:48:24 -07:00
parent b6d0ce9f78
commit 8a5480528d
14 changed files with 82 additions and 93 deletions
--- a/benchmark/kernels/fused_moe_triton/README.md
+++ b/benchmark/kernels/fused_moe_triton/README.md
@@ -27,19 +27,17 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
    --dtype fp8_w8a8 \
    --tune

-# Tune DeepSeek-V3 with FP8, TP=8 and n_share_experts_fusion=8
+# Tune DeepSeek-V3 with FP8 and TP=8
 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
    --model deepseek-ai/DeepSeek-V3-0324 \
    --tp-size 8 \
-    --n-share-experts-fusion 8 \
    --dtype fp8_w8a8 \
    --tune

-# Tune DeepSeek-R1 with channel-wise INT8, TP=16 and n_share_experts_fusion=16
+# Tune DeepSeek-R1 with channel-wise INT8 and TP=16
 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
    --model meituan/DeepSeek-R1-Channel-INT8 \
    --tp-size 16 \
-    --n-share-experts-fusion 16 \
    --dtype int8_w8a8 \
    --tune
 ```
@@ -65,11 +63,10 @@ python benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_tri
    --model deepseek-ai/DeepSeek-V3-0324 \
    --tp-size 8

-# Compare with custom TP size and n_share_experts_fusion
+# Compare with custom TP size
 python benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py \
    --model deepseek-ai/DeepSeek-V3-0324 \
-    --tp-size 8 \
-    --n-share-experts-fusion 8
+    --tp-size 8
 ```

 The benchmark results will be saved as plots and data files in the specified output directory (default: `./configs/benchmark_ops/vllm_sglang_fused_moe/`).