[cherry-pick]Upgrade CANN to 8.3.rc1 (#3945) (#3962)

This PR upgrade CANN from 8.2rc1 to 8.3rc1 and remove the CANN version check logic. TODO: we notice that UT runs failed with CANN 8.3 image. So the base image for UT is still 8.2. We'll fix it later. - vLLM version: v0.11.0 - vLLM main: 83f478bb19 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-11-06 09:05:08 +08:00
parent 66b67f9cf2
commit 7ee0b0b5d8
36 changed files with 104 additions and 192 deletions
--- a/vllm_ascend/ops/linear_op.py
+++ b/vllm_ascend/ops/linear_op.py
@@ -411,9 +411,8 @@ class SequenceRowParallelOp(CustomRowParallelOp):
                                                   quant_per_tensor)

        # For unquant
-        if mmrs_fusion and isinstance(
-                self.layer.quant_method, UnquantizedLinearMethod
-        ) and torch.version.cann.startswith("8.3"):
+        if mmrs_fusion and isinstance(self.layer.quant_method,
+                                      UnquantizedLinearMethod):
            output = torch_npu.npu_mm_reduce_scatter_base(
                x,
                self.layer.weight.t(),
@@ -429,8 +428,7 @@ class SequenceRowParallelOp(CustomRowParallelOp):
        elif mmrs_fusion and (
                isinstance(self.layer.quant_method, AscendLinearMethod)
                and isinstance(self.layer.quant_method.quant_method,
-                               AscendW8A8LinearMethod)
-        ) and torch.version.cann.startswith("8.3"):
+                               AscendW8A8LinearMethod)):
            if x.dtype != torch.int8:
                x_quant = quant_per_tensor(
                    x, self.layer.aclnn_input_scale_reciprocal,