upgrade torch npu version (#4433)

vLLM graph feature now rely on torch >=2.8. To make graph mode work, we need upgrade torch version as well. For long term support, upgrade torch to a newer one is good to go as well. Related vLLM change: https://github.com/vllm-project/vllm/pull/25110 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
2025-12-01 19:01:55 +08:00
parent f1f6370ed9
commit 0d14f635b4
22 changed files with 63 additions and 76 deletions
--- a/vllm_ascend/patch/platform/init.py
+++ b/vllm_ascend/patch/platform/init.py
@@ -18,7 +18,6 @@ import os

 import vllm_ascend.patch.platform.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_distributed  # noqa
-import vllm_ascend.patch.platform.patch_dynamo_vllm_backend  # noqa
 import vllm_ascend.patch.platform.patch_mamba_config  # noqa
 import vllm_ascend.patch.platform.patch_sched_yield  # noqa

--- a/vllm_ascend/patch/platform/patch_dynamo_vllm_backend.py
+++ b/vllm_ascend/patch/platform/patch_dynamo_vllm_backend.py
@@ -1,16 +0,0 @@
-# mypy: ignore-errors
-from typing import Any, Dict
-
-import torch.fx as fx
-from vllm.compilation.backends import VllmBackend
-from vllm.compilation.caching import VllmSerializableFunction
-
-_original_vllmbackend_call = VllmBackend.__call__
-
-
-def __patch_call__(self, graph: fx.GraphModule, example_inputs,
-                   options: Dict[str, Any]) -> VllmSerializableFunction:
-    return _original_vllmbackend_call(self, graph, example_inputs)
-
-
-VllmBackend.__call__ = __patch_call__