From de071366cddf260830a8871179c5db3752ec9946 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 23 Apr 2025 05:31:17 -0700 Subject: [PATCH] tune the threshold of gemma-2-27b-it in test_nightly_gsm8k_eval.py (#5677) --- test/srt/test_nightly_gsm8k_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py index 5250159bc..7232a1274 100644 --- a/test/srt/test_nightly_gsm8k_eval.py +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -24,7 +24,7 @@ MODEL_SCORE_THRESHOLDS = { "meta-llama/Llama-3.1-8B-Instruct": 0.82, "mistralai/Mistral-7B-Instruct-v0.3": 0.58, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, - "google/gemma-2-27b-it": 0.92, + "google/gemma-2-27b-it": 0.91, "meta-llama/Llama-3.1-70B-Instruct": 0.95, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "Qwen/Qwen2-57B-A14B-Instruct": 0.86,