diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py index 57044f5..f6feadd 100644 --- a/vllm_ascend/ops/linear_op.py +++ b/vllm_ascend/ops/linear_op.py @@ -299,6 +299,7 @@ class OProjRowParallelOp(CustomRowParallelOp): # otp-specific: Combine partial results across devices output = self.comm_group.reduce_scatter(output_parallel, dim=0) + output = output.view(input_.shape[0], self.layer.output_size) # Handle bias return based on configuration output_bias = self.bias if self.skip_bias_add else None