From 7ca751ff7d8ddb341cb22cf000c0e8ce5fc1bb4d Mon Sep 17 00:00:00 2001 From: kk <43161300+kkHuang-amd@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:38:22 +0800 Subject: [PATCH] Fused moe triton cfg opt for rocm (#2612) Co-authored-by: wunhuang --- ...me=AMD_Instinct_MI300X,dtype=fp8_w8a8.json | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json index abf258e56..bb17743b6 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json @@ -55,35 +55,35 @@ "kpack": 2 }, "128": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, - "num_warps": 4, + "num_warps": 8, "num_stages": 0, - "waves_per_eu": 1, + "waves_per_eu": 0, "matrix_instr_nonkdim": 16, "kpack": 1 }, "256": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, - "num_warps": 4, + "num_warps": 8, "num_stages": 0, - "waves_per_eu": 1, + "waves_per_eu": 0, "matrix_instr_nonkdim": 16, "kpack": 1 }, "512": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, - "num_warps": 4, + "num_warps": 8, "num_stages": 0, - "waves_per_eu": 2, + "waves_per_eu": 0, "matrix_instr_nonkdim": 16, "kpack": 2 },