From a6db88626e137d65b2b452609174a9e7aff2d779 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 20 Sep 2024 01:57:19 +0800 Subject: [PATCH] minor: add quant eval compared with base (#1475) --- python/sglang/test/test_utils.py | 1 + test/srt/test_nightly_gsm8k_eval.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index f6e5f3ca0..172c2fcc3 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -30,6 +30,7 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Meta-Llama-3.1-8B-Instruct DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct" DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8" DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8" +DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4" def is_in_ci(): diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py index 32a80dbd4..8b8e0e16b 100644 --- a/test/srt/test_nightly_gsm8k_eval.py +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -6,6 +6,7 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2, + DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1, DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, @@ -26,6 +27,7 @@ class TestEvalAccuracyLarge(unittest.TestCase): (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True), (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1), True, False), (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2), True, True), + (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1), False, False), ] cls.base_url = DEFAULT_URL_FOR_TEST @@ -53,6 +55,10 @@ class TestEvalAccuracyLarge(unittest.TestCase): other_args.extend(["--tp", "2"]) if "DeepSeek" in model: other_args.extend(["--mem-frac", "0.85"]) + if "AWQ" in model: + other_args.extend(["--quantization", "awq"]) + elif "GPTQ" in model: + other_args.extend(["--quantization", "gptq"]) self.process = popen_launch_server( model,