Update nightly tests (#4352)
This commit is contained in:
@@ -56,6 +56,12 @@ from sglang.srt.mem_cache.memory_pool import (
|
|||||||
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
from sglang.srt.model_loader import get_model
|
from sglang.srt.model_loader import get_model
|
||||||
|
from sglang.srt.model_loader.loader import (
|
||||||
|
DefaultModelLoader,
|
||||||
|
device_loading_context,
|
||||||
|
get_model_loader,
|
||||||
|
)
|
||||||
|
from sglang.srt.model_loader.utils import set_default_torch_dtype
|
||||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||||
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
|
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
@@ -409,13 +415,6 @@ class ModelRunner:
|
|||||||
self, model_path: str, load_format: str
|
self, model_path: str, load_format: str
|
||||||
) -> tuple[bool, str]:
|
) -> tuple[bool, str]:
|
||||||
"""Update engine weights in-place from the disk."""
|
"""Update engine weights in-place from the disk."""
|
||||||
from sglang.srt.model_loader.loader import (
|
|
||||||
DefaultModelLoader,
|
|
||||||
device_loading_context,
|
|
||||||
get_model_loader,
|
|
||||||
)
|
|
||||||
from sglang.srt.model_loader.utils import set_default_torch_dtype
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Update engine weights online from disk begin. "
|
f"Update engine weights online from disk begin. "
|
||||||
f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB"
|
f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB"
|
||||||
@@ -425,7 +424,7 @@ class ModelRunner:
|
|||||||
self.model_config.model_path = model_path
|
self.model_config.model_path = model_path
|
||||||
load_config = LoadConfig(load_format=load_format)
|
load_config = LoadConfig(load_format=load_format)
|
||||||
|
|
||||||
# Only support vllm DefaultModelLoader for now
|
# Only support the DefaultModelLoader for now
|
||||||
loader = get_model_loader(load_config)
|
loader = get_model_loader(load_config)
|
||||||
if not isinstance(loader, DefaultModelLoader):
|
if not isinstance(loader, DefaultModelLoader):
|
||||||
message = f"Failed to get model loader: {loader}."
|
message = f"Failed to get model loader: {loader}."
|
||||||
|
|||||||
@@ -26,14 +26,14 @@ MODEL_SCORE_THRESHOLDS = {
|
|||||||
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
|
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 0.85,
|
||||||
"google/gemma-2-27b-it": 0.92,
|
"google/gemma-2-27b-it": 0.92,
|
||||||
"meta-llama/Llama-3.1-70B-Instruct": 0.95,
|
"meta-llama/Llama-3.1-70B-Instruct": 0.95,
|
||||||
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.63,
|
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
|
||||||
"Qwen/Qwen2-57B-A14B-Instruct": 0.86,
|
"Qwen/Qwen2-57B-A14B-Instruct": 0.86,
|
||||||
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
|
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
|
||||||
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
|
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
|
||||||
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
|
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8": 0.84,
|
||||||
"neuralmagic/gemma-2-2b-it-FP8": 0.60,
|
"neuralmagic/gemma-2-2b-it-FP8": 0.60,
|
||||||
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
|
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
|
||||||
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.62,
|
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8": 0.65,
|
||||||
"neuralmagic/Qwen2-72B-Instruct-FP8": 0.94,
|
"neuralmagic/Qwen2-72B-Instruct-FP8": 0.94,
|
||||||
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82,
|
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8": 0.82,
|
||||||
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84,
|
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.84,
|
||||||
|
|||||||
Reference in New Issue
Block a user