diff --git a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu index 46ad440c5..53219ea86 100644 --- a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu +++ b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu @@ -25,7 +25,21 @@ limitations under the License. #include #include -#if defined CUDA_VERSION && CUDA_VERSION >= 12040 +// clang-format off +#if !defined(CUDA_VERSION) || CUDA_VERSION < 12040 +void cutlass_mla_decode( + torch::Tensor const& out, + torch::Tensor const& q_nope_and_q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, + torch::Tensor const& workspace) { + TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_decode"); +} +int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, int64_t sm_count) { + TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_get_workspace_size"); +} +#else #define CUTLASS_CHECK(status) \ { \ @@ -209,3 +223,4 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, } #endif +// clang-format on diff --git a/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu b/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu index b36846531..a5d954e7f 100644 --- a/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu +++ b/sgl-kernel/csrc/grammar/apply_token_bitmask_inplace_cuda.cu @@ -24,9 +24,12 @@ #include #include #include -// clang-format on -#if defined CUDA_VERSION && CUDA_VERSION >= 12040 +#if !defined(CUDA_VERSION) || CUDA_VERSION < 12040 +void ApplyTokenBitmaskInplace(at::Tensor logits, at::Tensor bitmask, at::optional indices = at::nullopt) { + TORCH_CHECK(false, "CUDA version must be >= 12.4 for ApplyTokenBitmaskInplace"); +} +#else #ifndef CUDART_INF_FP16 #define CUDART_INF_FP16 __ushort_as_half((unsigned short)0x7C00U) @@ -252,3 +255,4 @@ void ApplyTokenBitmaskInplace(at::Tensor logits, at::Tensor bitmask, at::optiona } } #endif +// clang-format on