From 0c9c6c75a80903a6415ad44b89e1b4be65ef77d5 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 30 Jun 2025 06:39:38 +0800 Subject: [PATCH] Move files related to EPLB (#7580) --- python/sglang/srt/eplb/__init__.py | 0 .../eplb_algorithms/__init__.py | 2 +- .../eplb_algorithms/deepseek.py | 0 .../eplb_algorithms/deepseek_vec.py | 0 .../srt/{managers => eplb}/eplb_manager.py | 6 ++--- .../srt/{ => eplb}/eplb_simulator/__init__.py | 0 .../srt/{ => eplb}/eplb_simulator/reader.py | 2 +- .../{managers => eplb}/expert_distribution.py | 2 +- .../srt/{managers => eplb}/expert_location.py | 2 +- .../expert_location_dispatch.py | 2 +- .../expert_location_updater.py | 2 +- python/sglang/srt/layers/moe/ep_moe/layer.py | 4 +-- .../srt/layers/moe/ep_moe/token_dispatcher.py | 4 +-- python/sglang/srt/layers/moe/topk.py | 6 ++--- python/sglang/srt/managers/scheduler.py | 4 +-- .../sglang/srt/model_executor/model_runner.py | 26 +++++++++---------- python/sglang/srt/models/deepseek_nextn.py | 4 +-- python/sglang/srt/models/deepseek_v2.py | 8 +++--- python/sglang/srt/models/hunyuan.py | 2 +- python/sglang/srt/models/qwen2_moe.py | 10 +++---- python/sglang/srt/models/qwen3_moe.py | 8 +++--- test/srt/test_expert_location_updater.py | 2 +- 22 files changed, 42 insertions(+), 54 deletions(-) create mode 100644 python/sglang/srt/eplb/__init__.py rename python/sglang/srt/{managers => eplb}/eplb_algorithms/__init__.py (96%) rename python/sglang/srt/{managers => eplb}/eplb_algorithms/deepseek.py (100%) rename python/sglang/srt/{managers => eplb}/eplb_algorithms/deepseek_vec.py (100%) rename python/sglang/srt/{managers => eplb}/eplb_manager.py (94%) rename python/sglang/srt/{ => eplb}/eplb_simulator/__init__.py (100%) rename python/sglang/srt/{ => eplb}/eplb_simulator/reader.py (97%) rename python/sglang/srt/{managers => eplb}/expert_distribution.py (99%) rename python/sglang/srt/{managers => eplb}/expert_location.py (99%) rename python/sglang/srt/{managers => eplb}/expert_location_dispatch.py (97%) rename python/sglang/srt/{model_executor => eplb}/expert_location_updater.py (99%) diff --git a/python/sglang/srt/eplb/__init__.py b/python/sglang/srt/eplb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/sglang/srt/managers/eplb_algorithms/__init__.py b/python/sglang/srt/eplb/eplb_algorithms/__init__.py similarity index 96% rename from python/sglang/srt/managers/eplb_algorithms/__init__.py rename to python/sglang/srt/eplb/eplb_algorithms/__init__.py index 7a970c320..e2a267810 100644 --- a/python/sglang/srt/managers/eplb_algorithms/__init__.py +++ b/python/sglang/srt/eplb/eplb_algorithms/__init__.py @@ -3,7 +3,7 @@ from typing import Optional import torch -from sglang.srt.managers.eplb_algorithms import deepseek, deepseek_vec +from sglang.srt.eplb.eplb_algorithms import deepseek, deepseek_vec class EplbAlgorithm(Enum): diff --git a/python/sglang/srt/managers/eplb_algorithms/deepseek.py b/python/sglang/srt/eplb/eplb_algorithms/deepseek.py similarity index 100% rename from python/sglang/srt/managers/eplb_algorithms/deepseek.py rename to python/sglang/srt/eplb/eplb_algorithms/deepseek.py diff --git a/python/sglang/srt/managers/eplb_algorithms/deepseek_vec.py b/python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py similarity index 100% rename from python/sglang/srt/managers/eplb_algorithms/deepseek_vec.py rename to python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py diff --git a/python/sglang/srt/managers/eplb_manager.py b/python/sglang/srt/eplb/eplb_manager.py similarity index 94% rename from python/sglang/srt/managers/eplb_manager.py rename to python/sglang/srt/eplb/eplb_manager.py index b74b7f21e..604e2c464 100644 --- a/python/sglang/srt/managers/eplb_manager.py +++ b/python/sglang/srt/eplb/eplb_manager.py @@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, List import torch.cuda -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) -from sglang.srt.managers.expert_location import ExpertLocationMetadata +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder +from sglang.srt.eplb.expert_location import ExpertLocationMetadata if TYPE_CHECKING: from sglang.srt.model_executor.model_runner import ModelRunner diff --git a/python/sglang/srt/eplb_simulator/__init__.py b/python/sglang/srt/eplb/eplb_simulator/__init__.py similarity index 100% rename from python/sglang/srt/eplb_simulator/__init__.py rename to python/sglang/srt/eplb/eplb_simulator/__init__.py diff --git a/python/sglang/srt/eplb_simulator/reader.py b/python/sglang/srt/eplb/eplb_simulator/reader.py similarity index 97% rename from python/sglang/srt/eplb_simulator/reader.py rename to python/sglang/srt/eplb/eplb_simulator/reader.py index 326542c79..97405c319 100644 --- a/python/sglang/srt/eplb_simulator/reader.py +++ b/python/sglang/srt/eplb/eplb_simulator/reader.py @@ -4,7 +4,7 @@ from pathlib import Path import torch from tqdm import tqdm -from sglang.srt.managers.expert_distribution import ( +from sglang.srt.eplb.expert_distribution import ( _convert_global_physical_count_to_logical_count, ) diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/eplb/expert_distribution.py similarity index 99% rename from python/sglang/srt/managers/expert_distribution.py rename to python/sglang/srt/eplb/expert_distribution.py index 5cbafff73..83fd42250 100644 --- a/python/sglang/srt/managers/expert_distribution.py +++ b/python/sglang/srt/eplb/expert_distribution.py @@ -24,7 +24,7 @@ import einops import torch import torch.distributed -from sglang.srt.managers.expert_location import ExpertLocationMetadata +from sglang.srt.eplb.expert_location import ExpertLocationMetadata from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.server_args import ServerArgs diff --git a/python/sglang/srt/managers/expert_location.py b/python/sglang/srt/eplb/expert_location.py similarity index 99% rename from python/sglang/srt/managers/expert_location.py rename to python/sglang/srt/eplb/expert_location.py index 13ba9849e..822429dc4 100644 --- a/python/sglang/srt/managers/expert_location.py +++ b/python/sglang/srt/eplb/expert_location.py @@ -23,7 +23,7 @@ import torch.distributed import torch.nn.functional as F from sglang.srt.configs.model_config import ModelConfig -from sglang.srt.managers import eplb_algorithms +from sglang.srt.eplb import eplb_algorithms from sglang.srt.model_loader import get_model_architecture from sglang.srt.server_args import ServerArgs diff --git a/python/sglang/srt/managers/expert_location_dispatch.py b/python/sglang/srt/eplb/expert_location_dispatch.py similarity index 97% rename from python/sglang/srt/managers/expert_location_dispatch.py rename to python/sglang/srt/eplb/expert_location_dispatch.py index 92b9095e3..36224eee7 100644 --- a/python/sglang/srt/managers/expert_location_dispatch.py +++ b/python/sglang/srt/eplb/expert_location_dispatch.py @@ -17,7 +17,7 @@ from typing import Literal, Optional import torch -from sglang.srt.managers.expert_location import get_global_expert_location_metadata +from sglang.srt.eplb.expert_location import get_global_expert_location_metadata from sglang.srt.managers.schedule_batch import global_server_args_dict diff --git a/python/sglang/srt/model_executor/expert_location_updater.py b/python/sglang/srt/eplb/expert_location_updater.py similarity index 99% rename from python/sglang/srt/model_executor/expert_location_updater.py rename to python/sglang/srt/eplb/expert_location_updater.py index 5dce3d9a3..6fdeb0322 100644 --- a/python/sglang/srt/model_executor/expert_location_updater.py +++ b/python/sglang/srt/eplb/expert_location_updater.py @@ -20,7 +20,7 @@ import torch import torch.distributed from torch.distributed import P2POp -from sglang.srt.managers.expert_location import ( +from sglang.srt.eplb.expert_location import ( ExpertLocationMetadata, get_global_expert_location_metadata, ) diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py index df228e528..d5cf3b568 100644 --- a/python/sglang/srt/layers/moe/ep_moe/layer.py +++ b/python/sglang/srt/layers/moe/ep_moe/layer.py @@ -11,6 +11,8 @@ from sglang.srt.distributed import ( get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, ) +from sglang.srt.eplb.expert_location import get_global_expert_location_metadata +from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.layers.moe.ep_moe.kernels import ( ep_gather, ep_scatter, @@ -40,8 +42,6 @@ from sglang.srt.layers.quantization.fp8_kernel import ( sglang_per_token_quant_fp8, ) from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz -from sglang.srt.managers.expert_location import get_global_expert_location_metadata -from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ForwardMode from sglang.srt.utils import ( diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index 2c2c4d1f5..a3df8432c 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -1,10 +1,8 @@ import logging from dataclasses import dataclass +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder from sglang.srt.layers.quantization import deep_gemm_wrapper -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.utils import ( DeepEPMode, diff --git a/python/sglang/srt/layers/moe/topk.py b/python/sglang/srt/layers/moe/topk.py index 348758c57..610931cc8 100644 --- a/python/sglang/srt/layers/moe/topk.py +++ b/python/sglang/srt/layers/moe/topk.py @@ -18,12 +18,12 @@ from typing import Callable, Optional import torch import torch.nn.functional as F -from sglang.srt.managers import expert_location_dispatch -from sglang.srt.managers.expert_distribution import ( +from sglang.srt.eplb import expert_location_dispatch +from sglang.srt.eplb.expert_distribution import ( ExpertDistributionRecorder, get_global_expert_distribution_recorder, ) -from sglang.srt.managers.expert_location_dispatch import ( +from sglang.srt.eplb.expert_location_dispatch import ( ExpertLocationDispatchInfo, topk_ids_logical_to_physical, ) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index b8364632f..e252f908c 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -58,6 +58,7 @@ from sglang.srt.disaggregation.utils import ( prepare_abort, ) from sglang.srt.distributed import get_pp_group, get_world_group +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder from sglang.srt.hf_transformers_utils import ( get_processor, get_tokenizer, @@ -65,9 +66,6 @@ from sglang.srt.hf_transformers_utils import ( ) from sglang.srt.layers.dp_attention import compute_dp_attention_world_info from sglang.srt.layers.logits_processor import LogitsProcessorOutput -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) from sglang.srt.managers.io_struct import ( AbortReq, CloseSessionReqInput, diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 277cab8df..8b9a367f4 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -39,6 +39,19 @@ from sglang.srt.distributed import ( set_mscclpp_all_reduce, ) from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state +from sglang.srt.eplb.eplb_manager import EPLBManager +from sglang.srt.eplb.expert_distribution import ( + ExpertDistributionRecorder, + get_global_expert_distribution_recorder, + set_global_expert_distribution_recorder, +) +from sglang.srt.eplb.expert_location import ( + ExpertLocationMetadata, + compute_initial_expert_location_metadata, + get_global_expert_location_metadata, + set_global_expert_location_metadata, +) +from sglang.srt.eplb.expert_location_updater import ExpertLocationUpdater from sglang.srt.layers.attention.tbo_backend import TboAttnBackend from sglang.srt.layers.dp_attention import ( get_attention_tp_group, @@ -54,18 +67,6 @@ from sglang.srt.layers.sampler import Sampler from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model from sglang.srt.layers.utils import is_sm100_supported from sglang.srt.lora.lora_manager import LoRAManager -from sglang.srt.managers.eplb_manager import EPLBManager -from sglang.srt.managers.expert_distribution import ( - ExpertDistributionRecorder, - get_global_expert_distribution_recorder, - set_global_expert_distribution_recorder, -) -from sglang.srt.managers.expert_location import ( - ExpertLocationMetadata, - compute_initial_expert_location_metadata, - get_global_expert_location_metadata, - set_global_expert_location_metadata, -) from sglang.srt.managers.schedule_batch import ( GLOBAL_SERVER_ARGS_KEYS, global_server_args_dict, @@ -84,7 +85,6 @@ from sglang.srt.mem_cache.memory_pool import ( SWAKVPool, ) from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner -from sglang.srt.model_executor.expert_location_updater import ExpertLocationUpdater from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors from sglang.srt.model_loader import get_model from sglang.srt.model_loader.loader import DefaultModelLoader, get_model_loader diff --git a/python/sglang/srt/models/deepseek_nextn.py b/python/sglang/srt/models/deepseek_nextn.py index d83586358..e61dadadc 100644 --- a/python/sglang/srt/models/deepseek_nextn.py +++ b/python/sglang/srt/models/deepseek_nextn.py @@ -21,6 +21,7 @@ from torch import nn from transformers import PretrainedConfig from sglang.srt.distributed import get_tensor_model_parallel_world_size +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.quantization.base_config import QuantizationConfig @@ -28,9 +29,6 @@ from sglang.srt.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.models.deepseek_v2 import DeepseekV2DecoderLayer, DeepseekV3ForCausalLM diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 79c7066df..f1ab8c3e7 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -32,6 +32,9 @@ from sglang.srt.distributed import ( parallel_state, tensor_model_parallel_all_reduce, ) +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder +from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation +from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.communicator import ( LayerCommunicator, @@ -77,11 +80,6 @@ from sglang.srt.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) -from sglang.srt.managers.expert_location import ModelConfigForExpertLocation -from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader.weight_utils import default_weight_loader diff --git a/python/sglang/srt/models/hunyuan.py b/python/sglang/srt/models/hunyuan.py index 00300bed5..41a833f3d 100644 --- a/python/sglang/srt/models/hunyuan.py +++ b/python/sglang/srt/models/hunyuan.py @@ -28,6 +28,7 @@ from sglang.srt.distributed import ( get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce, ) +from sglang.srt.eplb.expert_distribution import ExpertDistributionRecorder from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ( @@ -48,7 +49,6 @@ from sglang.srt.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) -from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader.weight_utils import ( default_weight_loader, diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index 67e72d465..0968ba0f4 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -31,6 +31,11 @@ from sglang.srt.distributed import ( get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce, ) +from sglang.srt.eplb.expert_distribution import ( + ExpertDistributionRecorder, + get_global_expert_distribution_recorder, +) +from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.communicator import ( LayerCommunicator, @@ -64,11 +69,6 @@ from sglang.srt.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) -from sglang.srt.managers.expert_distribution import ( - ExpertDistributionRecorder, - get_global_expert_distribution_recorder, -) -from sglang.srt.managers.expert_location import ModelConfigForExpertLocation from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors from sglang.srt.model_loader.weight_utils import default_weight_loader diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py index f885500a9..c76326ec0 100644 --- a/python/sglang/srt/models/qwen3_moe.py +++ b/python/sglang/srt/models/qwen3_moe.py @@ -32,6 +32,9 @@ from sglang.srt.distributed import ( tensor_model_parallel_all_gather, tensor_model_parallel_all_reduce, ) +from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder +from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation +from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes from sglang.srt.layers.dp_attention import ( @@ -63,11 +66,6 @@ from sglang.srt.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) -from sglang.srt.managers.expert_distribution import ( - get_global_expert_distribution_recorder, -) -from sglang.srt.managers.expert_location import ModelConfigForExpertLocation -from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import ( ForwardBatch, diff --git a/test/srt/test_expert_location_updater.py b/test/srt/test_expert_location_updater.py index 2b1249b1b..094540294 100644 --- a/test/srt/test_expert_location_updater.py +++ b/test/srt/test_expert_location_updater.py @@ -9,7 +9,7 @@ import torch.distributed import torch.multiprocessing as mp from torch.multiprocessing import Process -from sglang.srt.model_executor import expert_location_updater +from sglang.srt.eplb import expert_location_updater from sglang.test.test_utils import CustomTestCase, find_available_port from sglang.utils import is_in_ci