upgrade torch npu version (#4433)

vLLM graph feature now rely on torch >=2.8. To make graph mode work, we
need upgrade torch version as well. For long term support, upgrade torch
to a newer one is good to go as well.

Related vLLM change: https://github.com/vllm-project/vllm/pull/25110


- vLLM version: v0.11.2
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
wangxiyuan
2025-12-01 19:01:55 +08:00
committed by GitHub
parent f1f6370ed9
commit 0d14f635b4
22 changed files with 63 additions and 76 deletions

View File

@@ -40,7 +40,7 @@ from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
BatchEncoding, BatchFeature)
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm import LLM, SamplingParams
from vllm.config.model import TaskOption, _get_and_verify_dtype
from vllm.config.model import _get_and_verify_dtype
from vllm.inputs import TextPrompt
from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
@@ -270,7 +270,7 @@ class VllmRunner:
def __init__(
self,
model_name: str,
task: TaskOption = "auto",
runner: str = "auto",
tokenizer_name: Optional[str] = None,
tokenizer_mode: str = "auto",
# Use smaller max model length, otherwise bigger model cannot run due
@@ -288,7 +288,7 @@ class VllmRunner:
) -> None:
self.model = LLM(
model=model_name,
task=task,
runner=runner,
tokenizer=tokenizer_name,
tokenizer_mode=tokenizer_mode,
trust_remote_code=True,