[CI] Upgrade trasnformers version (#6307)

Upgrade transformers to >=4.56.4 - vLLM version: v0.14.1 - vLLM main: dc917cceb8 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-01-28 14:06:39 +08:00
parent c498cea22d
commit f8e76a49fa
14 changed files with 30 additions and 42 deletions
--- a/tests/e2e/multicard/2-cards/test_quantization.py
+++ b/tests/e2e/multicard/2-cards/test_quantization.py
@@ -16,7 +16,6 @@
 # This file is a part of the vllm-ascend project.
 # Adapted from vllm/tests/basic_correctness/test_basic_correctness.py
 #
-from modelscope import snapshot_download  # type: ignore
 import pytest

 from tests.e2e.conftest import VllmRunner
@@ -28,7 +27,7 @@ def test_qwen2_5_w8a8_external_quantized_tp2():
    ]
    max_tokens = 5
    with VllmRunner(
-            snapshot_download("neuralmagic/Qwen2.5-3B-quantized.w8a8"),
+            "neuralmagic/Qwen2.5-3B-quantized.w8a8",
            tensor_parallel_size=2,
            cudagraph_capture_sizes=[1, 2, 4, 8],
            max_model_len=4096,
@@ -52,8 +51,7 @@ def test_qwen3_moe_w8a8_dynamic_llm_compressor():
    ]
    max_tokens = 5
    with VllmRunner(
-            snapshot_download(
-                "vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8"),
+            "vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8",
            tensor_parallel_size=2,
            max_model_len=4096,
            gpu_memory_utilization=0.8,