Make sm100 fp8 kernels available on sm103 (#9789)

Signed-off-by: Hao Lu <14827759+hlu1@users.noreply.github.com>
2025-08-28 23:47:29 -07:00
parent 09a1df2231
commit 7a16db9bd9
3 changed files with 16 additions and 4 deletions
--- a/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu
+++ b/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu
@@ -260,7 +260,11 @@ torch::Tensor fp8_blockwise_scaled_mm(

 #if defined(CUTLASS_ARCH_MMA_SM100A_SUPPORTED) || defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
 #if defined CUDA_VERSION && CUDA_VERSION >= 12080
-  if (sm_version == 100) {
+  if (sm_version == 100
+#if CUDA_VERSION >= 12090
+      || sm_version == 103
+#endif
+  ) {
    if (out_dtype == torch::kBFloat16) {
      sm100_fp8_blockwise_dispatch_shape<cutlass::bfloat16_t>(
          out_padded, mat_a_padded, mat_b, scales_a_padded, scales_b);