From d40ee62b5d1af63ee4748d70f4c290cf78e8da83 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 12 Mar 2025 15:36:13 -0700 Subject: [PATCH] Update nightly tests (#4352) --- python/sglang/srt/model_executor/model_runner.py | 15 +++++++-------- test/srt/test_nightly_gsm8k_eval.py | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 58ae425b1..cb2069dda 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import ( from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader import get_model +from sglang.srt.model_loader.loader import ( + DefaultModelLoader, + device_loading_context, + get_model_loader, +) +from sglang.srt.model_loader.utils import set_default_torch_dtype from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.server_args import ServerArgs @@ -409,13 +415,6 @@ class ModelRunner: self, model_path: str, load_format: str ) -> tuple[bool, str]: """Update engine weights in-place from the disk.""" - from sglang.srt.model_loader.loader import ( - DefaultModelLoader, - device_loading_context, - get_model_loader, - ) - from sglang.srt.model_loader.utils import set_default_torch_dtype - logger.info( f"Update engine weights online from disk begin. " f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB" @@ -425,7 +424,7 @@ class ModelRunner: self.model_config.model_path = model_path load_config = LoadConfig(load_format=load_format) - # Only support vllm DefaultModelLoader for now + # Only support the DefaultModelLoader for now loader = get_model_loader(load_config) if not isinstance(loader, DefaultModelLoader): message = f"Failed to get model loader: {loader}." diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py index a3ed74348..a8d750029 100644 --- a/test/srt/test_nightly_gsm8k_eval.py +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = { "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85, "google/gemma-2-27b-it": 0.92, "meta-llama/Llama-3.1-70B-Instruct": 0.95, - "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63, + "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64, "Qwen/Qwen2-57B-A14B-Instruct": 0.86, "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83, "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54, "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84, "neuralmagic/gemma-2-2b-it-FP8": 0.60, "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94, - "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.62, + "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65, "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94, "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82, "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84,