From d8a9cb845802949ddaae70cebf3aa80215fc4414 Mon Sep 17 00:00:00 2001 From: realliujiaxu Date: Fri, 26 Sep 2025 10:55:32 +0800 Subject: [PATCH] [Bugfix] fix bug when tp=1 (#3193) ### What this PR does / why we need it? Addresses a bug in DenseOptimRowParallelOp that occurs when tensor parallelism is not used ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/52d0cb845866869d587fc013a7c59e60a86ebcf2 --- vllm_ascend/ops/linear_op.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py index f6feadd..819af72 100644 --- a/vllm_ascend/ops/linear_op.py +++ b/vllm_ascend/ops/linear_op.py @@ -390,7 +390,9 @@ class SequenceRowParallelOp(CustomRowParallelOp): bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias if self.tp_size == 1 or not self.reduce_results: - output = self.quant_method.apply(self, input_parallel, bias=bias_) + output = self.quant_method.apply(self.layer, + input_parallel, + bias=bias_) else: output_parallel = self.quant_method.apply(self.layer, input_parallel,