From 2a413829f42b8e8433a3e7cfd91cc9cb241cfbc0 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Sat, 7 Jun 2025 17:43:50 +0800 Subject: [PATCH] Add triton version as a fused_moe_triton config search key to avoid performace decrease in different Triton version (#5955) --- .../srt/layers/moe/fused_moe_triton/configs/README | 3 +++ ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 ...vice_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0 .../E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=16,N=1024,device_name=NVIDIA_H200.json | 0 .../E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json | 0 .../E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 ...vice_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 ...vice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json | 0 .../E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json | 0 .../E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json | 0 ...evice_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json | 0 ...SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json | 0 ...evice_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json | 0 ..._80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...28,device_name=NVIDIA_H20,block_shape=[128, 128].json | 0 ...NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...nct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...nct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...n_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json | 0 ...=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json | 0 ...256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json | 0 ..._80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=64,N=1280,device_name=NVIDIA_H200.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 ...64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=64,N=2560,device_name=NVIDIA_H200.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=64,N=320,device_name=NVIDIA_H200.json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json | 0 ...vice_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=64,N=640,device_name=NVIDIA_H200.json | 0 .../E=8,N=14336,device_name=AMD_Instinct_MI300X.json | 0 .../E=8,N=14336,device_name=AMD_Instinct_MI325X.json | 0 .../E=8,N=14336,device_name=AMD_Radeon_Graphics.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 ...8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=14336,device_name=NVIDIA_H200.json | 0 .../E=8,N=1792,device_name=AMD_Instinct_MI300X.json | 0 .../E=8,N=1792,device_name=AMD_Instinct_MI325X.json | 0 .../E=8,N=1792,device_name=AMD_Radeon_Graphics.json | 0 .../E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json | 0 .../E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json | 0 .../E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=1792,device_name=NVIDIA_H200.json | 0 .../E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=2048,device_name=NVIDIA_H200.json | 0 .../E=8,N=3584,device_name=AMD_Instinct_MI300X.json | 0 .../E=8,N=3584,device_name=AMD_Instinct_MI325X.json | 0 .../E=8,N=3584,device_name=AMD_Radeon_Graphics.json | 0 .../E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json | 0 .../E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...vice_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=3584,device_name=NVIDIA_H200.json | 0 .../E=8,N=3584,device_name=NVIDIA_L40S.json | 0 ...6,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json | 0 ...6,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json | 0 ...6,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json | 0 .../E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=4096,device_name=NVIDIA_H200.json | 0 .../E=8,N=7168,device_name=AMD_Instinct_MI300X.json | 0 .../E=8,N=7168,device_name=AMD_Instinct_MI325X.json | 0 .../E=8,N=7168,device_name=AMD_Radeon_Graphics.json | 0 .../E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 .../E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=8,N=7168,device_name=NVIDIA_H200.json | 0 ...2,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json | 0 ...2,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json | 0 ...2,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json | 0 ...device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json | 0 ...=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json | 0 .../E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json | 0 .../E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=128,N=192,device_name=NVIDIA_H20.json | 0 .../E=128,N=192,device_name=NVIDIA_H200.json | 0 .../E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=128,N=384,device_name=NVIDIA_H20.json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=128,N=384,device_name=NVIDIA_H200.json | 0 .../E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json | 0 .../E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json | 0 .../E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json | 0 ...NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=128,N=768,device_name=NVIDIA_H20.json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=128,N=768,device_name=NVIDIA_H200.json | 0 .../E=128,N=96,device_name=NVIDIA_H20.json | 0 ..._80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...evice_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json | 0 ...VIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...VIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...evice_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json | 0 ..._80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 ...NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json | 0 .../E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json | 0 .../E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json | 0 .../sglang/srt/layers/moe/fused_moe_triton/fused_moe.py | 9 ++++++++- 158 files changed, 11 insertions(+), 1 deletion(-) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1024,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=1280,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=2560,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=320,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=64,N=640,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=AMD_Instinct_MI300X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=AMD_Instinct_MI325X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=AMD_Radeon_Graphics.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=14336,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=AMD_Instinct_MI300X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=AMD_Instinct_MI325X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=AMD_Radeon_Graphics.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=1792,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=2048,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=AMD_Instinct_MI300X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=AMD_Instinct_MI325X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=AMD_Radeon_Graphics.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=3584,device_name=NVIDIA_L40S.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=4096,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=AMD_Instinct_MI300X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=AMD_Instinct_MI325X.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=AMD_Radeon_Graphics.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=7168,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_1_0}/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=192,device_name=NVIDIA_H20.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=192,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=384,device_name=NVIDIA_H20.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=384,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_H20.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=768,device_name=NVIDIA_H200.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=128,N=96,device_name=NVIDIA_H20.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json (100%) rename python/sglang/srt/layers/moe/fused_moe_triton/configs/{ => triton_3_2_0}/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json (100%) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/README b/python/sglang/srt/layers/moe/fused_moe_triton/configs/README index 4aa527f27..3679e698a 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/configs/README +++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/README @@ -3,6 +3,9 @@ For different settings of - E (number of experts) - N (intermediate size) - device_name (torch.cuda.get_device_name()) +- dtype: The data type used by the fused MoE kernel for computation. Supported types include fp8_w8a8, int8_w8a8, int8_w8a16, int4_w4a16, etc. This determines the precision and quantization scheme for both weights and activations. +- block_shape: The block quantization shape introduced starting from DeepSeek V3/R1 models. This parameter defines the granularity for block-wise quantization, typically specified as `[block_n, block_k]` where `block_n` and `block_k` represent the block dimensions. For example, DeepSeek V3 commonly uses `[128, 128]` block shapes for efficient block-wise FP8 quantization. + the JSON file contains a mapping from M (batch size) to the chosen configuration. The example configurations provided are for the Mixtral model for TP2 on H100 diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI300X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI325X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Radeon_Graphics.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Radeon_Graphics.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI300X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI325X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Radeon_Graphics.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Radeon_Graphics.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI300X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI325X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Radeon_Graphics.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Radeon_Graphics.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_L40S.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_L40S.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI300X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI300X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI325X.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI325X.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Radeon_Graphics.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Radeon_Graphics.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H20.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H20.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H20.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H20.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=96,device_name=NVIDIA_H20.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=96,device_name=NVIDIA_H20.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=96,device_name=NVIDIA_H20.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=96,device_name=NVIDIA_H20.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json similarity index 100% rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py index bd1432d38..df4a490e4 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py @@ -936,8 +936,15 @@ def get_moe_configs( # directory json_file_name = get_config_file_name(E, N, dtype, [block_n, block_k]) + # We found that using the fused_moe_kernel config from Triton 3.1.0 with Triton 3.2.0 results in negative performance gains, + # so we also include the Triton version as a key for finding the fused_moe_kernel config to achieve the best performance. + triton_version = triton.__version__ + version_dir = f"triton_{triton_version.replace('.', '_')}" config_file_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name + os.path.dirname(os.path.realpath(__file__)), + "configs", + version_dir, + json_file_name, ) if os.path.exists(config_file_path): with open(config_file_path) as f: