upgrade torch npu version (#4433)
vLLM graph feature now rely on torch >=2.8. To make graph mode work, we need upgrade torch version as well. For long term support, upgrade torch to a newer one is good to go as well. Related vLLM change: https://github.com/vllm-project/vllm/pull/25110 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
@@ -18,7 +18,6 @@ import os
|
||||
|
||||
import vllm_ascend.patch.platform.patch_config # noqa
|
||||
import vllm_ascend.patch.platform.patch_distributed # noqa
|
||||
import vllm_ascend.patch.platform.patch_dynamo_vllm_backend # noqa
|
||||
import vllm_ascend.patch.platform.patch_mamba_config # noqa
|
||||
import vllm_ascend.patch.platform.patch_sched_yield # noqa
|
||||
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
# mypy: ignore-errors
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch.fx as fx
|
||||
from vllm.compilation.backends import VllmBackend
|
||||
from vllm.compilation.caching import VllmSerializableFunction
|
||||
|
||||
_original_vllmbackend_call = VllmBackend.__call__
|
||||
|
||||
|
||||
def __patch_call__(self, graph: fx.GraphModule, example_inputs,
|
||||
options: Dict[str, Any]) -> VllmSerializableFunction:
|
||||
return _original_vllmbackend_call(self, graph, example_inputs)
|
||||
|
||||
|
||||
VllmBackend.__call__ = __patch_call__
|
||||
Reference in New Issue
Block a user