Update CUTLASS. Refine KernelSchedule for fp8 (grouped) gemm. (#10491)

This commit is contained in:
Qi Yuhang
2025-09-16 17:47:37 +08:00
committed by GitHub
parent c0c6f543e4
commit 9b876889b7
3 changed files with 5 additions and 5 deletions

View File

@@ -72,7 +72,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
using EpilogueTileType = cutlass::epilogue::collective::EpilogueTileAuto;
using StoreEpilogueCompute = typename cutlass::epilogue::fusion::Sm90EVT<cutlass::epilogue::fusion::Sm90AccFetch>;
using KernelSchedule = cutlass::gemm::KernelTmaWarpSpecializedCooperativeFP8BlockScaledAccum;
using KernelSchedule = cutlass::gemm::KernelTmaWarpSpecializedCooperativeFP8Blockwise;
using CollectiveEpilogue = typename cutlass::epilogue::collective::CollectiveBuilder<
ArchTag,
OperatorClass,