From 6737671c82cd654dc052b3ffd7ddfcce73dfbe90 Mon Sep 17 00:00:00 2001 From: Even Zhou Date: Sat, 19 Jul 2025 02:34:55 +0800 Subject: [PATCH] [Bugfix] Fix w8a8_int8 import error on NPU (#8147) --- python/sglang/srt/layers/quantization/w8a8_int8.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/srt/layers/quantization/w8a8_int8.py b/python/sglang/srt/layers/quantization/w8a8_int8.py index c9af7ae29..19cf49c9b 100644 --- a/python/sglang/srt/layers/quantization/w8a8_int8.py +++ b/python/sglang/srt/layers/quantization/w8a8_int8.py @@ -754,6 +754,8 @@ class NPU_W8A8LinearMethod(LinearMethodBase): x: torch.Tensor, bias: Optional[torch.Tensor] = None, ) -> torch.Tensor: + from sglang.srt.layers.linear import RowParallelLinear + if isinstance(layer, RowParallelLinear): tp_rank = get_tensor_model_parallel_rank() return self.quant_method.apply(layer, x, bias, tp_rank)