diff --git a/scripts/amd_ci_start_container.sh b/scripts/amd_ci_start_container.sh index bf0f891ea..bf3d52890 100755 --- a/scripts/amd_ci_start_container.sh +++ b/scripts/amd_ci_start_container.sh @@ -9,7 +9,7 @@ else fi # Pull the image -IMAGE="ghcr.io/saienduri/sglang-aiter-backend-v0.1.2:518" +IMAGE="lmsysorg/sglang:v0.4.6.post5-rocm630" echo "Pulling Docker image: $IMAGE" docker pull "$IMAGE" diff --git a/test/srt/test_nightly_gsm8k_eval_amd.py b/test/srt/test_nightly_gsm8k_eval_amd.py index 5d800e884..e49cd107b 100644 --- a/test/srt/test_nightly_gsm8k_eval_amd.py +++ b/test/srt/test_nightly_gsm8k_eval_amd.py @@ -26,18 +26,17 @@ MODEL_SCORE_THRESHOLDS = { "meta-llama/Llama-3.1-70B-Instruct": 0.95, "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "Qwen/Qwen2-57B-A14B-Instruct": 0.86, - "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.82, + "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94, "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94, "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.86, - "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.61, + "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65, "google/gemma-2-27b-it": 0.91, + "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84, } -# Models currently failing on AMD MI300x. failing_models = { - "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8", "neuralmagic/gemma-2-2b-it-FP8", } @@ -61,6 +60,16 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = remove_failing_models( DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 ) +NO_MOE_PADDING_MODELS = {"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"} +DISABLE_HF_XET_MODELS = { + "Qwen/Qwen2-57B-A14B-Instruct", + "neuralmagic/Qwen2-57B-A14B-Instruct-FP8", +} +TRITON_MOE_MODELS = { + "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8", + "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8", +} + def parse_models(model_string): return [model.strip() for model in model_string.split(",") if model.strip()] @@ -156,6 +165,16 @@ class TestNightlyGsm8KEval(unittest.TestCase): for model_group, is_fp8, is_tp2 in self.model_groups: for model in model_group: with self.subTest(model=model): + os.environ["SGLANG_MOE_PADDING"] = ( + "0" if model in NO_MOE_PADDING_MODELS else "1" + ) + os.environ["HF_HUB_DISABLE_XET"] = ( + "1" if model in DISABLE_HF_XET_MODELS else "0" + ) + os.environ["SGLANG_AITER_MOE"] = ( + "0" if model in TRITON_MOE_MODELS else "1" + ) + process = popen_launch_server_wrapper(self.base_url, model, is_tp2) args = SimpleNamespace(