[CI] Upgrade trasnformers version (#6307)
Upgrade transformers to >=4.56.4
- vLLM version: v0.14.1
- vLLM main:
dc917cceb8
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -21,7 +21,6 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
from vllm import SamplingParams
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
@@ -66,11 +65,11 @@ def test_aclgraph_mem_use(model: str, max_tokens: int) -> None:
|
||||
sampling_params = SamplingParams(max_tokens=max_tokens,
|
||||
temperature=0.0)
|
||||
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
|
||||
vllm_model = VllmRunner(snapshot_download(model),
|
||||
vllm_model = VllmRunner(model,
|
||||
max_model_len=1024,
|
||||
quantization="ascend")
|
||||
else:
|
||||
vllm_model = VllmRunner(snapshot_download(model))
|
||||
vllm_model = VllmRunner(model)
|
||||
_ = vllm_model.generate(prompts, sampling_params)
|
||||
|
||||
assert capture_called.value == 1, "capture_model was not called during test"
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
import vllm
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
from vllm.lora.request import LoRARequest
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
@@ -46,7 +45,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
||||
|
||||
def test_ilama_lora(ilama_lora_files):
|
||||
with VllmRunner(
|
||||
snapshot_download(MODEL_PATH),
|
||||
MODEL_PATH,
|
||||
enable_lora=True,
|
||||
dtype="half",
|
||||
max_loras=4,
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
from vllm import SamplingParams
|
||||
from vllm.assets.audio import AudioAsset
|
||||
|
||||
@@ -46,7 +45,7 @@ def test_minicpm(model) -> None:
|
||||
]
|
||||
max_tokens = 5
|
||||
|
||||
with VllmRunner(snapshot_download(model),
|
||||
with VllmRunner(model,
|
||||
max_model_len=512,
|
||||
gpu_memory_utilization=0.7) as runner:
|
||||
runner.generate_greedy(example_prompts, max_tokens)
|
||||
@@ -61,7 +60,7 @@ def test_whisper(model) -> None:
|
||||
max_tokens=10,
|
||||
stop_token_ids=None)
|
||||
|
||||
with VllmRunner(snapshot_download(model),
|
||||
with VllmRunner(model,
|
||||
max_model_len=448,
|
||||
max_num_seqs=5,
|
||||
dtype="bfloat16",
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
from modelscope import snapshot_download # type: ignore[import-untyped]
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from tests.e2e.model_utils import check_outputs_equal
|
||||
|
||||
@@ -33,7 +31,7 @@ def test_qwen3_w8a8_quant():
|
||||
)]
|
||||
|
||||
with VllmRunner(
|
||||
snapshot_download("vllm-ascend/Qwen3-0.6B-W8A8"),
|
||||
"vllm-ascend/Qwen3-0.6B-W8A8",
|
||||
max_model_len=8192,
|
||||
gpu_memory_utilization=0.7,
|
||||
cudagraph_capture_sizes=[1, 2, 4, 8],
|
||||
@@ -62,7 +60,7 @@ def test_qwen3_dense_w8a16():
|
||||
)]
|
||||
|
||||
with VllmRunner(
|
||||
snapshot_download("vllm-ascend/Qwen3-0.6B-W8A16"),
|
||||
"vllm-ascend/Qwen3-0.6B-W8A16",
|
||||
max_model_len=8192,
|
||||
enforce_eager=False,
|
||||
gpu_memory_utilization=0.7,
|
||||
|
||||
Reference in New Issue
Block a user