[Fix] illegal sync based on undefined behaviour (#9620)

Signed-off-by: Devashish Lal <devashish@rivosinc.com>
Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
DevashishLal-CB
2025-09-05 20:54:48 -07:00
committed by GitHub
parent ad26f298e2
commit dbb1235d58

View File

@@ -8,7 +8,7 @@
template <int THREADS_PER_SUBWARP>
__device__ __forceinline__ float GroupReduceMax(float val, const int tid) {
unsigned mask = 0xffff;
unsigned mask = threadIdx.x % 32 >= 16 ? 0xffff0000 : 0x0000ffff;
static_assert(
(THREADS_PER_SUBWARP & (THREADS_PER_SUBWARP - 1)) == 0 && THREADS_PER_SUBWARP <= 16 && THREADS_PER_SUBWARP >= 1,