Update nightly tests (#4352)

2025-03-12 15:36:13 -07:00
parent 91b19949d7
commit d40ee62b5d
2 changed files with 9 additions and 10 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.model_loader import get_model
+from sglang.srt.model_loader.loader import (
+    DefaultModelLoader,
+    device_loading_context,
+    get_model_loader,
+)
+from sglang.srt.model_loader.utils import set_default_torch_dtype
 from sglang.srt.model_loader.weight_utils import default_weight_loader
 from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
 from sglang.srt.server_args import ServerArgs
@@ -409,13 +415,6 @@ class ModelRunner:
        self, model_path: str, load_format: str
    ) -> tuple[bool, str]:
        """Update engine weights in-place from the disk."""
-        from sglang.srt.model_loader.loader import (
-            DefaultModelLoader,
-            device_loading_context,
-            get_model_loader,
-        )
-        from sglang.srt.model_loader.utils import set_default_torch_dtype
-
        logger.info(
            f"Update engine weights online from disk begin. "
            f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB"
@@ -425,7 +424,7 @@ class ModelRunner:
        self.model_config.model_path = model_path
        load_config = LoadConfig(load_format=load_format)

-        # Only support vllm DefaultModelLoader for now
+        # Only support the DefaultModelLoader for now
        loader = get_model_loader(load_config)
        if not isinstance(loader, DefaultModelLoader):
            message = f"Failed to get model loader: {loader}."
--- a/test/srt/test_nightly_gsm8k_eval.py
+++ b/test/srt/test_nightly_gsm8k_eval.py
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
    "google/gemma-2-27b-it": 0.92,
    "meta-llama/Llama-3.1-70B-Instruct": 0.95,
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63,
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
    "Qwen/Qwen2-57B-A14B-Instruct": 0.86,
    "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
    "neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
    "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
    "neuralmagic/gemma-2-2b-it-FP8": 0.60,
    "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
-    "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.62,
+    "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65,
    "neuralmagic/Qwen2-72B-Instruct-FP8": 0.94,
    "neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82,
    "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84,