Tiny detect slow ranks (#10508)

This commit is contained in:
fzyzcjy
2025-10-02 18:00:33 +08:00
committed by GitHub
parent f35def8652
commit 2ac453b07f
2 changed files with 75 additions and 0 deletions

View File

@@ -31,6 +31,7 @@ import requests
import torch
import torch.distributed as dist
from sglang.srt import slow_rank_detector
from sglang.srt.configs.device_config import DeviceConfig
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
@@ -283,6 +284,9 @@ class ModelRunner:
# CPU offload
set_offloader(create_offloader_from_server_args(server_args, dp_rank=dp_rank))
if get_bool_env_var("SGLANG_DETECT_SLOW_RANK"):
slow_rank_detector.execute()
# Update deep gemm configure
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
deep_gemm_wrapper.update_deep_gemm_config(gpu_id, server_args)