Update torch-npu version to 2.7.1 (#3896)

### What this PR does / why we need it?
Upgrade torch-npu to the official release version 2.7.1


- vLLM version: v0.11.0
- vLLM main:
83f478bb19

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-10-31 17:16:31 +08:00
committed by GitHub
parent 5f6d1b3323
commit fcc9a0eaeb
15 changed files with 83 additions and 168 deletions

View File

@@ -7,7 +7,6 @@ from vllm.forward_context import get_forward_context
from vllm_ascend.ascend_config import WeightPrefetchConfig
from vllm_ascend.ops.linear import (AscendQKVParallelLinear,
AscendRowParallelLinear)
from vllm_ascend.utils import version_check
SUPPORTED_MODULES = ["attn", "mlp", "moe"]
MOE_PREFETCH_TOKEN_THRESHOLD = 96
@@ -83,8 +82,7 @@ class WeightPrefetchMethod:
if not self.moe.is_active_this_forward:
return
forward_context = get_forward_context()
if not version_check():
forward_context.layer_idx += 1
# layer_idx is subtracted by 1 because layer_idx was incremented by 1 at layernorm.
weight = forward_context.model_instance.model.layers[
forward_context.layer_idx - 1].mlp.experts.w13_weight
weight_size = weight.data.element_size() * weight.data.numel(