From b5dd5e87416acc05576e286b826b62e97264a0fb Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Mon, 14 Jul 2025 07:11:49 +0800 Subject: [PATCH] chore: remove unnecessary limits on quantization methods in test script (#7997) --- test/srt/test_vllm_dependency.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/srt/test_vllm_dependency.py b/test/srt/test_vllm_dependency.py index 7e6278a20..cd3e13167 100644 --- a/test/srt/test_vllm_dependency.py +++ b/test/srt/test_vllm_dependency.py @@ -42,10 +42,6 @@ def popen_launch_server_wrapper(base_url, model, is_fp8, is_tp2): other_args.extend(["--tp", "2"]) if "DeepSeek" in model: other_args.extend(["--mem-frac", "0.85"]) - if "AWQ" in model: - other_args.extend(["--quantization", "awq"]) - elif "GPTQ" in model: - other_args.extend(["--quantization", "gptq"]) process = popen_launch_server( model,