From eb8f02dd87acd8689c41d15a7c0f11f5eff914d0 Mon Sep 17 00:00:00 2001 From: Sai Enduri Date: Mon, 26 May 2025 11:44:13 -0700 Subject: [PATCH] Update nightly thresholds and dependencies. (#6635) --- scripts/amd_ci_install_dependency.sh | 2 ++ test/srt/test_nightly_gsm8k_eval_amd.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/amd_ci_install_dependency.sh b/scripts/amd_ci_install_dependency.sh index eedbed020..00245f4d3 100755 --- a/scripts/amd_ci_install_dependency.sh +++ b/scripts/amd_ci_install_dependency.sh @@ -13,3 +13,5 @@ docker exec -w /human-eval ci_sglang pip install -e . docker exec -w / ci_sglang mkdir -p /dummy-grok mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json docker cp ./dummy-grok ci_sglang:/ + +docker exec ci_sglang pip install huggingface_hub[hf_xet] diff --git a/test/srt/test_nightly_gsm8k_eval_amd.py b/test/srt/test_nightly_gsm8k_eval_amd.py index 3d70df746..5d800e884 100644 --- a/test/srt/test_nightly_gsm8k_eval_amd.py +++ b/test/srt/test_nightly_gsm8k_eval_amd.py @@ -21,24 +21,24 @@ from sglang.test.test_utils import ( MODEL_SCORE_THRESHOLDS = { "meta-llama/Llama-3.1-8B-Instruct": 0.82, - "mistralai/Mistral-7B-Instruct-v0.3": 0.56, + "mistralai/Mistral-7B-Instruct-v0.3": 0.58, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, "meta-llama/Llama-3.1-70B-Instruct": 0.95, - "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.62, + "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "Qwen/Qwen2-57B-A14B-Instruct": 0.86, - "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.81, + "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.82, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, - "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.93, - "neuralmagic/Qwen2-72B-Instruct-FP8": 0.93, - "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82, + "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94, + "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94, + "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.86, + "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.61, + "google/gemma-2-27b-it": 0.91, } # Models currently failing on AMD MI300x. failing_models = { - "google/gemma-2-27b-it", "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8", "neuralmagic/gemma-2-2b-it-FP8", - "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8", }