From 3e4794aad8a2349c1ac3d98d2c6369c637fade18 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Sun, 13 Apr 2025 01:01:13 +0800 Subject: [PATCH] refine fused_moe tuning docs (#5294) --- benchmark/kernels/fused_moe_triton/README.md | 12 ++++++++++-- .../srt/layers/moe/fused_moe_triton/fused_moe.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/benchmark/kernels/fused_moe_triton/README.md b/benchmark/kernels/fused_moe_triton/README.md index 2a3e37f68..7cfc66eaa 100644 --- a/benchmark/kernels/fused_moe_triton/README.md +++ b/benchmark/kernels/fused_moe_triton/README.md @@ -8,6 +8,11 @@ This directory contains benchmarking tools for MoE (Mixture of Experts) kernels. Example usage: ```bash +# Tune Mixtral-8x7B with default settings +python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \ + --model mistralai/Mixtral-8x7B-Instruct-v0.1 \ + --tune + # Tune Qwen2-57B with FP8 and TP=4 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \ --model Qwen/Qwen2-57B-A14B-Instruct \ @@ -15,9 +20,12 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \ --dtype fp8_w8a8 \ --tune -# Tune Mixtral-8x7B with default settings +# Tune DeepSeek-V3 with FP8, TP=8 and n_share_experts_fusion=8 python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \ - --model mistralai/Mixtral-8x7B-Instruct-v0.1 \ + --model deepseek-ai/DeepSeek-V3-0324 \ + --tp-size 8 \ + --n-share-experts-fusion 8 \ + --dtype fp8_w8a8 \ --tune ``` diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py index 5d3e319e5..282cfbf06 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py @@ -956,7 +956,7 @@ def get_moe_configs( logger.warning( ( "Using default MoE config. Performance might be sub-optimal! " - "Config file not found at %s" + "Config file not found at %s, you can tune the config with https://github.com/sgl-project/sglang/blob/main/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py." ), config_file_path, )