refine fused_moe tuning docs (#5294)
This commit is contained in:
@@ -8,6 +8,11 @@ This directory contains benchmarking tools for MoE (Mixture of Experts) kernels.
|
||||
|
||||
Example usage:
|
||||
```bash
|
||||
# Tune Mixtral-8x7B with default settings
|
||||
python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
|
||||
--model mistralai/Mixtral-8x7B-Instruct-v0.1 \
|
||||
--tune
|
||||
|
||||
# Tune Qwen2-57B with FP8 and TP=4
|
||||
python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
|
||||
--model Qwen/Qwen2-57B-A14B-Instruct \
|
||||
@@ -15,9 +20,12 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
|
||||
# Tune Mixtral-8x7B with default settings
|
||||
# Tune DeepSeek-V3 with FP8, TP=8 and n_share_experts_fusion=8
|
||||
python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
|
||||
--model mistralai/Mixtral-8x7B-Instruct-v0.1 \
|
||||
--model deepseek-ai/DeepSeek-V3-0324 \
|
||||
--tp-size 8 \
|
||||
--n-share-experts-fusion 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
```
|
||||
|
||||
|
||||
@@ -956,7 +956,7 @@ def get_moe_configs(
|
||||
logger.warning(
|
||||
(
|
||||
"Using default MoE config. Performance might be sub-optimal! "
|
||||
"Config file not found at %s"
|
||||
"Config file not found at %s, you can tune the config with https://github.com/sgl-project/sglang/blob/main/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py."
|
||||
),
|
||||
config_file_path,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user