Update thresholds in test_nightly_gsm8k_eval.py (#3176)
This commit is contained in:
@@ -27,7 +27,7 @@ MODEL_SCORE_THRESHOLDS = {
|
||||
"google/gemma-2-27b-it": 0.92,
|
||||
"meta-llama/Llama-3.1-70B-Instruct": 0.95,
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63,
|
||||
"Qwen/Qwen2-57B-A14B-Instruct": 0.87,
|
||||
"Qwen/Qwen2-57B-A14B-Instruct": 0.86,
|
||||
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
|
||||
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
|
||||
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
|
||||
|
||||
Reference in New Issue
Block a user