[Bug] Fix w4afp8 moe kernel (#9392)
This commit is contained in:
@@ -1488,6 +1488,10 @@ struct CollectiveMmaArrayMixedInput<
|
|||||||
template <class... TMs>
|
template <class... TMs>
|
||||||
CUTLASS_DEVICE void
|
CUTLASS_DEVICE void
|
||||||
tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple<TMs...> const& input_tensormaps) {
|
tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple<TMs...> const& input_tensormaps) {
|
||||||
|
if (cute::elect_one_sync()) {
|
||||||
|
cute::tma_desc_commit_group();
|
||||||
|
cute::tma_desc_wait_group();
|
||||||
|
}
|
||||||
// Entire warp must do this (i.e. it's aligned)
|
// Entire warp must do this (i.e. it's aligned)
|
||||||
tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A);
|
tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A);
|
||||||
tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B);
|
tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B);
|
||||||
|
|||||||
Reference in New Issue
Block a user