Update torch-npu version to 2.7.1 (#3896)

### What this PR does / why we need it? Upgrade torch-npu to the official release version 2.7.1 - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-10-31 17:16:31 +08:00
parent 5f6d1b3323
commit fcc9a0eaeb
15 changed files with 83 additions and 168 deletions
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -11,8 +11,7 @@ from vllm.forward_context import (BatchDescriptor, get_forward_context,
                                  set_forward_context)

 import vllm_ascend.envs as envs_ascend
-from vllm_ascend.utils import (enable_sp, has_layer_idx, is_moe_model,
-                               version_check)
+from vllm_ascend.utils import enable_sp, has_layer_idx, is_moe_model

 if TYPE_CHECKING:
    from vllm_ascend.ops.weight_prefetch import WeightPrefetchMethod
@@ -163,9 +162,7 @@ def set_ascend_forward_context(
        # this optim now just support dense models due to the specific operators used.
        # Once the necessary conditions are met, support for MOE models will also be added.
        from vllm_ascend.quantization.quant_config import AscendQuantConfig
-        model_type_scope = ["llama", "qwen2", "qwen3"]
-        if version_check():
-            model_type_scope.append("qwen3_moe")
+        model_type_scope = ["llama", "qwen2", "qwen3", "qwen3_moe"]
        addrmsnorm_quant_fusion_enabled = isinstance(vllm_config.quant_config, AscendQuantConfig) and \
            vllm_config.model_config.hf_config.model_type in model_type_scope and \
            forward_context.layer_idx is not None