diff --git a/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu b/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu index 9a99debb6..b36846531 100644 --- a/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu +++ b/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu @@ -26,6 +26,8 @@ #include // clang-format on +#if defined CUDA_VERSION && CUDA_VERSION >= 12040 + #ifndef CUDART_INF_FP16 #define CUDART_INF_FP16 __ushort_as_half((unsigned short)0x7C00U) #endif @@ -249,3 +251,4 @@ void ApplyTokenBitmaskInplace(at::Tensor logits, at::Tensor bitmask, at::optiona break; } } +#endif