Tiny detect slow ranks (#10508)
This commit is contained in:
@@ -31,6 +31,7 @@ import requests
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from sglang.srt import slow_rank_detector
|
||||
from sglang.srt.configs.device_config import DeviceConfig
|
||||
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
|
||||
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
|
||||
@@ -283,6 +284,9 @@ class ModelRunner:
|
||||
# CPU offload
|
||||
set_offloader(create_offloader_from_server_args(server_args, dp_rank=dp_rank))
|
||||
|
||||
if get_bool_env_var("SGLANG_DETECT_SLOW_RANK"):
|
||||
slow_rank_detector.execute()
|
||||
|
||||
# Update deep gemm configure
|
||||
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
|
||||
deep_gemm_wrapper.update_deep_gemm_config(gpu_id, server_args)
|
||||
|
||||
Reference in New Issue
Block a user