diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py index f6feadd..819af72 100644 --- a/vllm_ascend/ops/linear_op.py +++ b/vllm_ascend/ops/linear_op.py @@ -390,7 +390,9 @@ class SequenceRowParallelOp(CustomRowParallelOp): bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias if self.tp_size == 1 or not self.reduce_results: - output = self.quant_method.apply(self, input_parallel, bias=bias_) + output = self.quant_method.apply(self.layer, + input_parallel, + bias=bias_) else: output_parallel = self.quant_method.apply(self.layer, input_parallel,