diff --git a/python/sglang/srt/managers/expert_location.py b/python/sglang/srt/managers/expert_location.py index 7391dc8c2..97d439558 100644 --- a/python/sglang/srt/managers/expert_location.py +++ b/python/sglang/srt/managers/expert_location.py @@ -136,7 +136,7 @@ class ExpertLocationMetadata: num_physical_experts = common["num_physical_experts"] phase = server_args.disaggregation_mode - if phase == "null": + if phase == "null" or model_config_for_expert_location.num_groups is None: phase = "decode" physical_to_logical_map, logical_to_all_physical_map, expert_count = ( diff --git a/python/sglang/srt/model_executor/expert_location_updater.py b/python/sglang/srt/model_executor/expert_location_updater.py index ca31ef740..13c4adc8d 100644 --- a/python/sglang/srt/model_executor/expert_location_updater.py +++ b/python/sglang/srt/model_executor/expert_location_updater.py @@ -12,6 +12,7 @@ # limitations under the License. # ============================================================================== import logging +from datetime import timedelta from typing import Dict, List, Tuple import torch @@ -340,7 +341,7 @@ def update_expert_weights_single_layer( reqs = torch.distributed.batch_isend_irecv(p2p_ops) try: for req in reqs: - req.wait(timeout=30) + req.wait(timeout=timedelta(seconds=30)) except RuntimeError: logger.error( f"Context: {rank=} {old_physical_to_logical_map=} {new_physical_to_logical_map=} {num_local_physical_experts=} {num_gpu_per_node=}"