Fix deepseek-v3 with torch.compile in PyTorch 2.6. (#5213)
This commit is contained in:
@@ -177,7 +177,8 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
|
|||||||
*/
|
*/
|
||||||
m.def(
|
m.def(
|
||||||
"bmm_fp8(Tensor A, Tensor B, Tensor! D, Tensor A_scale, Tensor B_scale, Tensor workspace_buffer, int "
|
"bmm_fp8(Tensor A, Tensor B, Tensor! D, Tensor A_scale, Tensor B_scale, Tensor workspace_buffer, int "
|
||||||
"cublas_handle, int cuda_stream) -> ()");
|
"cublas_handle, int cuda_stream) -> ()",
|
||||||
|
{at::Tag::needs_fixed_stride_order});
|
||||||
m.impl("bmm_fp8", torch::kCUDA, &bmm_fp8);
|
m.impl("bmm_fp8", torch::kCUDA, &bmm_fp8);
|
||||||
|
|
||||||
m.def(
|
m.def(
|
||||||
|
|||||||
Reference in New Issue
Block a user