From f8ca66fb4965db751f8263097ef27965ab2e1442 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 27 Jan 2025 03:02:09 -0800 Subject: [PATCH] Update thresholds in test_nightly_gsm8k_eval.py (#3176) --- test/srt/test_nightly_gsm8k_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py index 06c83048f..6fe361715 100644 --- a/test/srt/test_nightly_gsm8k_eval.py +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -27,7 +27,7 @@ MODEL_SCORE_THRESHOLDS = { "google/gemma-2-27b-it": 0.92, "meta-llama/Llama-3.1-70B-Instruct": 0.95, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63, - "Qwen/Qwen2-57B-A14B-Instruct": 0.87, + "Qwen/Qwen2-57B-A14B-Instruct": 0.86, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,