From f1769586d651c701bc5f5b6f3a39d5b0f478eb02 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 10 Jan 2025 20:37:34 -0800 Subject: [PATCH] Update threshold in test_nightly_gsm8k_eval.py (#2836) --- test/srt/test_nightly_gsm8k_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py index 7820f6825..2e379c111 100644 --- a/test/srt/test_nightly_gsm8k_eval.py +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -26,8 +26,8 @@ MODEL_SCORE_THRESHOLDS = { "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, "google/gemma-2-27b-it": 0.92, "meta-llama/Llama-3.1-70B-Instruct": 0.95, - "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, - "Qwen/Qwen2-57B-A14B-Instruct": 0.88, + "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63, + "Qwen/Qwen2-57B-A14B-Instruct": 0.87, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,