From f78b7fd16dbfe32c2ee73c1f3fef49fc1257b27f Mon Sep 17 00:00:00 2001 From: Yuhao Yao <37280700+yuhyao@users.noreply.github.com> Date: Wed, 3 Sep 2025 18:28:27 +0800 Subject: [PATCH] [1/N][Bug] Fix w4afp8 MoE NaN issue (sgl-kernel) (#9953) --- sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh b/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh index 9bc45ab1c..92cd58fed 100644 --- a/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh +++ b/sgl-kernel/csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cuh @@ -41,8 +41,8 @@ using MmaType = cutlass::float_e4m3_t; // FP8 e4m3 type using QuantType = cutlass::int4b_t; // 4-bit integer type using ElementAccumulator = float; // Accumulator type using ElementScale = cutlass::bfloat16_t; // Scale type -using ElementC = cutlass::half_t; // Default output type (FP16) -using ElementD = ElementC; // Default output type (FP16) +using ElementC = cutlass::bfloat16_t; // Output type +using ElementD = ElementC; // Output type using ProblemShape = cutlass::gemm::GroupProblemShape>; // Architecture-specific configurations