refine fused_moe tuning docs (#5294)

2025-04-13 01:01:13 +08:00
parent 690ec20587
commit 3e4794aad8
2 changed files with 11 additions and 3 deletions
--- a/benchmark/kernels/fused_moe_triton/README.md
+++ b/benchmark/kernels/fused_moe_triton/README.md
@@ -8,6 +8,11 @@ This directory contains benchmarking tools for MoE (Mixture of Experts) kernels.

 Example usage:
 ```bash
+# Tune Mixtral-8x7B with default settings
+python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
+    --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
+    --tune
+
 # Tune Qwen2-57B with FP8 and TP=4
 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
    --model Qwen/Qwen2-57B-A14B-Instruct \
@@ -15,9 +20,12 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
    --dtype fp8_w8a8 \
    --tune

-# Tune Mixtral-8x7B with default settings
+# Tune DeepSeek-V3 with FP8, TP=8 and n_share_experts_fusion=8
 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
-    --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
+    --model deepseek-ai/DeepSeek-V3-0324 \
+    --tp-size 8 \
+    --n-share-experts-fusion 8 \
+    --dtype fp8_w8a8 \
    --tune
 ```

--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
@@ -956,7 +956,7 @@ def get_moe_configs(
    logger.warning(
        (
            "Using default MoE config. Performance might be sub-optimal! "
-            "Config file not found at %s"
+            "Config file not found at %s, you can tune the config with https://github.com/sgl-project/sglang/blob/main/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py."
        ),
        config_file_path,
    )