[CI] Upgrade trasnformers version (#6307)

Upgrade transformers to >=4.56.4

- vLLM version: v0.14.1
- vLLM main:
dc917cceb8

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2026-01-28 14:06:39 +08:00
committed by GitHub
parent c498cea22d
commit f8e76a49fa
14 changed files with 30 additions and 42 deletions

View File

@@ -21,7 +21,6 @@ from unittest.mock import patch
import pytest
import torch
from modelscope import snapshot_download # type: ignore
from vllm import SamplingParams
from tests.e2e.conftest import VllmRunner
@@ -66,11 +65,11 @@ def test_aclgraph_mem_use(model: str, max_tokens: int) -> None:
sampling_params = SamplingParams(max_tokens=max_tokens,
temperature=0.0)
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
vllm_model = VllmRunner(snapshot_download(model),
vllm_model = VllmRunner(model,
max_model_len=1024,
quantization="ascend")
else:
vllm_model = VllmRunner(snapshot_download(model))
vllm_model = VllmRunner(model)
_ = vllm_model.generate(prompts, sampling_params)
assert capture_called.value == 1, "capture_model was not called during test"

View File

@@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
import vllm
from modelscope import snapshot_download # type: ignore
from vllm.lora.request import LoRARequest
from tests.e2e.conftest import VllmRunner
@@ -46,7 +45,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
def test_ilama_lora(ilama_lora_files):
with VllmRunner(
snapshot_download(MODEL_PATH),
MODEL_PATH,
enable_lora=True,
dtype="half",
max_loras=4,

View File

@@ -20,7 +20,6 @@
import os
import pytest
from modelscope import snapshot_download # type: ignore
from vllm import SamplingParams
from vllm.assets.audio import AudioAsset
@@ -46,7 +45,7 @@ def test_minicpm(model) -> None:
]
max_tokens = 5
with VllmRunner(snapshot_download(model),
with VllmRunner(model,
max_model_len=512,
gpu_memory_utilization=0.7) as runner:
runner.generate_greedy(example_prompts, max_tokens)
@@ -61,7 +60,7 @@ def test_whisper(model) -> None:
max_tokens=10,
stop_token_ids=None)
with VllmRunner(snapshot_download(model),
with VllmRunner(model,
max_model_len=448,
max_num_seqs=5,
dtype="bfloat16",

View File

@@ -15,8 +15,6 @@
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
from modelscope import snapshot_download # type: ignore[import-untyped]
from tests.e2e.conftest import VllmRunner
from tests.e2e.model_utils import check_outputs_equal
@@ -33,7 +31,7 @@ def test_qwen3_w8a8_quant():
)]
with VllmRunner(
snapshot_download("vllm-ascend/Qwen3-0.6B-W8A8"),
"vllm-ascend/Qwen3-0.6B-W8A8",
max_model_len=8192,
gpu_memory_utilization=0.7,
cudagraph_capture_sizes=[1, 2, 4, 8],
@@ -62,7 +60,7 @@ def test_qwen3_dense_w8a16():
)]
with VllmRunner(
snapshot_download("vllm-ascend/Qwen3-0.6B-W8A16"),
"vllm-ascend/Qwen3-0.6B-W8A16",
max_model_len=8192,
enforce_eager=False,
gpu_memory_utilization=0.7,