From 8cdc76f6d4cd61ced1d84a44c243b8a89e0a1f74 Mon Sep 17 00:00:00 2001 From: kk <43161300+kkHuang-amd@users.noreply.github.com> Date: Thu, 3 Oct 2024 00:52:46 +0800 Subject: [PATCH] [Performance, Hardware] MoE tuning on AMD MI300x GPUs (#1554) Co-authored-by: wunhuang --- ...name=AMD_Instinct_MI300X,dtype=float8.json | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json diff --git a/python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json b/python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json new file mode 100644 index 000000000..00434d8e3 --- /dev/null +++ b/python/sglang/srt/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=float8.json @@ -0,0 +1,57 @@ +{ + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 0, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "16": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 0, + "waves_per_eu": 2, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 0, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 0, + "waves_per_eu": 2, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32768": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 0, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + } +}