[Bug] Fix w4afp8 moe kernel (#9392)
This commit is contained in:
@@ -1488,6 +1488,10 @@ struct CollectiveMmaArrayMixedInput<
|
||||
template <class... TMs>
|
||||
CUTLASS_DEVICE void
|
||||
tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple<TMs...> const& input_tensormaps) {
|
||||
if (cute::elect_one_sync()) {
|
||||
cute::tma_desc_commit_group();
|
||||
cute::tma_desc_wait_group();
|
||||
}
|
||||
// Entire warp must do this (i.e. it's aligned)
|
||||
tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A);
|
||||
tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B);
|
||||
|
||||
Reference in New Issue
Block a user