[CI] Upgrade trasnformers version (#6307)
Upgrade transformers to >=4.56.4
- vLLM version: v0.14.1
- vLLM main:
dc917cceb8
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -16,7 +16,6 @@
|
||||
# This file is a part of the vllm-ascend project.
|
||||
# Adapted from vllm/tests/basic_correctness/test_basic_correctness.py
|
||||
#
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
import pytest
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
@@ -28,7 +27,7 @@ def test_qwen2_5_w8a8_external_quantized_tp2():
|
||||
]
|
||||
max_tokens = 5
|
||||
with VllmRunner(
|
||||
snapshot_download("neuralmagic/Qwen2.5-3B-quantized.w8a8"),
|
||||
"neuralmagic/Qwen2.5-3B-quantized.w8a8",
|
||||
tensor_parallel_size=2,
|
||||
cudagraph_capture_sizes=[1, 2, 4, 8],
|
||||
max_model_len=4096,
|
||||
@@ -52,8 +51,7 @@ def test_qwen3_moe_w8a8_dynamic_llm_compressor():
|
||||
]
|
||||
max_tokens = 5
|
||||
with VllmRunner(
|
||||
snapshot_download(
|
||||
"vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8"),
|
||||
"vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8",
|
||||
tensor_parallel_size=2,
|
||||
max_model_len=4096,
|
||||
gpu_memory_utilization=0.8,
|
||||
|
||||
Reference in New Issue
Block a user