diff --git a/tests/singlecard/test_offline_inference.py b/tests/singlecard/test_offline_inference.py index 5b58df7..de69612 100644 --- a/tests/singlecard/test_offline_inference.py +++ b/tests/singlecard/test_offline_inference.py @@ -39,7 +39,7 @@ MODELS = [ MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"] QUANTIZATION_MODELS = [ - "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new", + "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8", ] os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"