diff --git a/python/sglang/srt/layers/quantization/unquant.py b/python/sglang/srt/layers/quantization/unquant.py index 495beb009..fd7a3b3d7 100644 --- a/python/sglang/srt/layers/quantization/unquant.py +++ b/python/sglang/srt/layers/quantization/unquant.py @@ -1,6 +1,5 @@ from __future__ import annotations -import importlib.util from typing import TYPE_CHECKING, List, Optional import torch @@ -31,8 +30,6 @@ if TYPE_CHECKING: StandardDispatchOutput, ) -has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None - _is_cpu_amx_available = cpu_has_amx_support() _is_hip = is_hip() @@ -143,7 +140,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): self.triton_kernel_moe_forward = None self.triton_kernel_moe_with_bias_forward = None - if torch.cuda.is_available() and has_triton_kernels: + if torch.cuda.is_available() and use_triton_kernels: from sglang.srt.layers.moe.fused_moe_triton.triton_kernels_moe import ( triton_kernel_moe_forward as _tk_forward, )