Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
@@ -109,6 +109,7 @@ using FP8_TYPE = c10::Float8_e4m3fnuz;
|
|||||||
constexpr auto FP8_E4M3_MAX = 224.0f;
|
constexpr auto FP8_E4M3_MAX = 224.0f;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef USE_ROCM
|
||||||
__device__ __forceinline__ float atomicMaxFloat(float* addr, float value) {
|
__device__ __forceinline__ float atomicMaxFloat(float* addr, float value) {
|
||||||
float old;
|
float old;
|
||||||
old = (value >= 0) ? __int_as_float(atomicMax((int*)addr, __float_as_int(value)))
|
old = (value >= 0) ? __int_as_float(atomicMax((int*)addr, __float_as_int(value)))
|
||||||
@@ -124,3 +125,4 @@ __device__ __forceinline__ float warpReduceMax(float max_value) {
|
|||||||
max_value = fmaxf(max_value, __shfl_xor_sync(0xffffffff, max_value, 1));
|
max_value = fmaxf(max_value, __shfl_xor_sync(0xffffffff, max_value, 1));
|
||||||
return max_value;
|
return max_value;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user