upgrade torch npu version (#4433)

vLLM graph feature now rely on torch >=2.8. To make graph mode work, we
need upgrade torch version as well. For long term support, upgrade torch
to a newer one is good to go as well.

Related vLLM change: https://github.com/vllm-project/vllm/pull/25110


- vLLM version: v0.11.2
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
wangxiyuan
2025-12-01 19:01:55 +08:00
committed by GitHub
parent f1f6370ed9
commit 0d14f635b4
22 changed files with 63 additions and 76 deletions

View File

@@ -28,7 +28,7 @@ def test_bge_model_correctness():
model_name = snapshot_download("BAAI/bge-m3")
with VllmRunner(
model_name,
task="embed",
runner="pooling",
enforce_eager=True,
) as vllm_runner:
vllm_outputs = vllm_runner.encode(queries)

View File

@@ -28,7 +28,7 @@ def test_embed_models_correctness():
model_name = snapshot_download("Qwen/Qwen3-Embedding-0.6B")
with VllmRunner(
model_name,
task="embed",
runner="pooling",
enforce_eager=False,
) as vllm_runner:
vllm_outputs = vllm_runner.encode(queries)

View File

@@ -34,14 +34,14 @@ def test_aclgrpah_embed_models_correctness(model_name):
with VllmRunner(
model_name,
task="embed",
runner="pooling",
enforce_eager=False,
) as vllm_aclgraph_runner:
vllm_aclgraph_outputs = vllm_aclgraph_runner.encode(queries)
with VllmRunner(
model_name,
task="embed",
runner="pooling",
enforce_eager=True,
) as vllm_runner:
vllm_outputs = vllm_runner.encode(queries)