diff --git a/python/sglang/srt/layers/attention/attention_registry.py b/python/sglang/srt/layers/attention/attention_registry.py index 2bf271c29..c89fe809c 100644 --- a/python/sglang/srt/layers/attention/attention_registry.py +++ b/python/sglang/srt/layers/attention/attention_registry.py @@ -183,6 +183,7 @@ def attn_backend_wrapper(runner: "ModelRunner", full_attn_backend: "AttentionBac ), "hybrid_gdn can only be used with non-MLA models." if cfg := runner.mambaish_config: + from sglang.srt.layers.attention.fla.utils import check_environments from sglang.srt.layers.attention.hybrid_linear_attn_backend import ( GDNAttnBackend, HybridLinearAttnBackend, @@ -190,6 +191,7 @@ def attn_backend_wrapper(runner: "ModelRunner", full_attn_backend: "AttentionBac ) from sglang.srt.utils import is_blackwell, is_npu + check_environments() if runner.hybrid_gdn_config is not None: if is_blackwell(): assert ( diff --git a/python/sglang/srt/layers/attention/fla/utils.py b/python/sglang/srt/layers/attention/fla/utils.py index 3caf70de5..8613d611d 100644 --- a/python/sglang/srt/layers/attention/fla/utils.py +++ b/python/sglang/srt/layers/attention/fla/utils.py @@ -58,9 +58,6 @@ def check_environments(): return None -check_environments() - - def get_abs_err(x, y): return (x.detach() - y.detach()).flatten().abs().max().item()