[fix] fix enable_pdl for blackwell (#9011)

2025-08-19 01:16:08 +08:00
parent c480a3f6ea
commit 6cdcbcc674
2 changed files with 7 additions and 7 deletions
--- a/sgl-kernel/python/sgl_kernel/elementwise.py
+++ b/sgl-kernel/python/sgl_kernel/elementwise.py
@@ -2,7 +2,7 @@ from dataclasses import dataclass
 from typing import Optional

 import torch
-from sgl_kernel.utils import get_cuda_stream, is_hopper_arch
+from sgl_kernel.utils import get_cuda_stream, is_arch_support_pdl


 # These implementations extensively draw from and build upon the FlashInfer project https://github.com/flashinfer-ai/flashinfer
@@ -41,7 +41,7 @@ def rmsnorm(
    if out is None:
        out = torch.empty_like(input)
    if enable_pdl is None:
-        enable_pdl = is_hopper_arch()
+        enable_pdl = is_arch_support_pdl()
    torch.ops.sgl_kernel.rmsnorm.default(out, input, weight, eps, enable_pdl)
    return out

@@ -77,7 +77,7 @@ def fused_add_rmsnorm(
        If None, will be automatically enabled on Hopper architecture.
    """
    if enable_pdl is None:
-        enable_pdl = is_hopper_arch()
+        enable_pdl = is_arch_support_pdl()
    torch.ops.sgl_kernel.fused_add_rmsnorm.default(
        input, residual, weight, eps, enable_pdl
    )
@@ -117,7 +117,7 @@ def gemma_rmsnorm(
    if out is None:
        out = torch.empty_like(input)
    if enable_pdl is None:
-        enable_pdl = is_hopper_arch()
+        enable_pdl = is_arch_support_pdl()
    torch.ops.sgl_kernel.gemma_rmsnorm.default(out, input, weight, eps, enable_pdl)
    return out

@@ -153,7 +153,7 @@ def gemma_fused_add_rmsnorm(
        If None, will be automatically enabled on Hopper architecture.
    """
    if enable_pdl is None:
-        enable_pdl = is_hopper_arch()
+        enable_pdl = is_arch_support_pdl()
    torch.ops.sgl_kernel.gemma_fused_add_rmsnorm.default(
        input, residual, weight, eps, enable_pdl
    )