From fa3592cfeb9d7d008e31719fdf68f3dc7d5ad9dd Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Sun, 8 Jun 2025 20:01:34 +0800 Subject: [PATCH] rebase h20 fused_moe config (#6966) --- benchmark/kernels/fused_moe_triton/README.md | 2 +- .../layers/moe/fused_moe_triton/configs/{README => README.md} | 0 ...e_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...e_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{README => README.md} (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) diff --git a/benchmark/kernels/fused_moe_triton/README.md b/benchmark/kernels/fused_moe_triton/README.md index 573e0ffaa..48598854a 100644 --- a/benchmark/kernels/fused_moe_triton/README.md +++ b/benchmark/kernels/fused_moe_triton/README.md @@ -42,7 +42,7 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \ --tune ``` -After tuning, a configuration file (e.g., `E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json`) will be generated in the current directory. You can move this file to `sglang/srt/layers/fused_moe_triton/configs/` to use it in `sglang`. +After tuning, a configuration file (e.g., `E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json`) will be generated in the current directory. You can move this file to `sglang/srt/layers/fused_moe_triton/configs/triton_version` dir to use it in `sglang`. ### Performance Comparison Tool diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/README b/python/sglang/srt/layers/moe/fused_moe_triton/configs/README.md similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/README rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/README.md diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json