From 76f44c2a8d576fd2fea7fc7c2f25030bcb98374a Mon Sep 17 00:00:00 2001 From: Richard Zou Date: Thu, 10 Apr 2025 12:14:38 -0400 Subject: [PATCH] Fix deepseek-v3 with torch.compile in PyTorch 2.6. (#5213) --- sgl-kernel/csrc/common_extension.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sgl-kernel/csrc/common_extension.cc b/sgl-kernel/csrc/common_extension.cc index a299ba0ff..ea9060972 100644 --- a/sgl-kernel/csrc/common_extension.cc +++ b/sgl-kernel/csrc/common_extension.cc @@ -177,7 +177,8 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) { */ m.def( "bmm_fp8(Tensor A, Tensor B, Tensor! D, Tensor A_scale, Tensor B_scale, Tensor workspace_buffer, int " - "cublas_handle, int cuda_stream) -> ()"); + "cublas_handle, int cuda_stream) -> ()", + {at::Tag::needs_fixed_stride_order}); m.impl("bmm_fp8", torch::kCUDA, &bmm_fp8); m.def(