[Bugfix] Fix w8a8_int8 import error on NPU (#8147)

This commit is contained in:
Even Zhou
2025-07-19 02:34:55 +08:00
committed by GitHub
parent fd63b62eaa
commit 6737671c82

View File

@@ -754,6 +754,8 @@ class NPU_W8A8LinearMethod(LinearMethodBase):
x: torch.Tensor,
bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
from sglang.srt.layers.linear import RowParallelLinear
if isinstance(layer, RowParallelLinear):
tp_rank = get_tensor_model_parallel_rank()
return self.quant_method.apply(layer, x, bias, tp_rank)