From fbb5f229d46fa2e8b0c363705f950258bb39b5f8 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 23 Apr 2025 01:36:26 -0700 Subject: [PATCH] fix awq_dequantize import (#5669) --- python/sglang/srt/layers/quantization/awq.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/awq.py b/python/sglang/srt/layers/quantization/awq.py index b14807ee4..9f14ac4c1 100644 --- a/python/sglang/srt/layers/quantization/awq.py +++ b/python/sglang/srt/layers/quantization/awq.py @@ -3,7 +3,6 @@ import logging from typing import Any, Dict, List, Optional import torch -from sgl_kernel import awq_dequantize from sglang.srt.layers.linear import ( LinearBase, @@ -12,6 +11,11 @@ from sglang.srt.layers.linear import ( ) from sglang.srt.layers.parameter import GroupQuantScaleParameter, PackedvLLMParameter from sglang.srt.layers.quantization.base_config import QuantizationConfig +from sglang.srt.utils import is_cuda + +_is_cuda = is_cuda() +if _is_cuda: + from sgl_kernel import awq_dequantize logger = logging.getLogger(__name__)