From a8ba32798e1e17ed54a8ea4d052596f5f9a63173 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 20 Oct 2025 08:14:47 +0800 Subject: [PATCH] Fix triton_kernels import error on some hardwares (#11831) --- python/sglang/srt/layers/quantization/unquant.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/sglang/srt/layers/quantization/unquant.py b/python/sglang/srt/layers/quantization/unquant.py index 495beb009..fd7a3b3d7 100644 --- a/python/sglang/srt/layers/quantization/unquant.py +++ b/python/sglang/srt/layers/quantization/unquant.py @@ -1,6 +1,5 @@ from __future__ import annotations -import importlib.util from typing import TYPE_CHECKING, List, Optional import torch @@ -31,8 +30,6 @@ if TYPE_CHECKING: StandardDispatchOutput, ) -has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None - _is_cpu_amx_available = cpu_has_amx_support() _is_hip = is_hip() @@ -143,7 +140,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): self.triton_kernel_moe_forward = None self.triton_kernel_moe_with_bias_forward = None - if torch.cuda.is_available() and has_triton_kernels: + if torch.cuda.is_available() and use_triton_kernels: from sglang.srt.layers.moe.fused_moe_triton.triton_kernels_moe import ( triton_kernel_moe_forward as _tk_forward, )