From 0c9c6c75a80903a6415ad44b89e1b4be65ef77d5 Mon Sep 17 00:00:00 2001
From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
Date: Mon, 30 Jun 2025 06:39:38 +0800
Subject: [PATCH] Move files related to EPLB (#7580)

---
 python/sglang/srt/eplb/__init__.py            |  0
 .../eplb_algorithms/__init__.py               |  2 +-
 .../eplb_algorithms/deepseek.py               |  0
 .../eplb_algorithms/deepseek_vec.py           |  0
 .../srt/{managers => eplb}/eplb_manager.py    |  6 ++---
 .../srt/{ => eplb}/eplb_simulator/__init__.py |  0
 .../srt/{ => eplb}/eplb_simulator/reader.py   |  2 +-
 .../{managers => eplb}/expert_distribution.py |  2 +-
 .../srt/{managers => eplb}/expert_location.py |  2 +-
 .../expert_location_dispatch.py               |  2 +-
 .../expert_location_updater.py                |  2 +-
 python/sglang/srt/layers/moe/ep_moe/layer.py  |  4 +--
 .../srt/layers/moe/ep_moe/token_dispatcher.py |  4 +--
 python/sglang/srt/layers/moe/topk.py          |  6 ++---
 python/sglang/srt/managers/scheduler.py       |  4 +--
 .../sglang/srt/model_executor/model_runner.py | 26 +++++++++----------
 python/sglang/srt/models/deepseek_nextn.py    |  4 +--
 python/sglang/srt/models/deepseek_v2.py       |  8 +++---
 python/sglang/srt/models/hunyuan.py           |  2 +-
 python/sglang/srt/models/qwen2_moe.py         | 10 +++----
 python/sglang/srt/models/qwen3_moe.py         |  8 +++---
 test/srt/test_expert_location_updater.py      |  2 +-
 22 files changed, 42 insertions(+), 54 deletions(-)
 create mode 100644 python/sglang/srt/eplb/__init__.py
 rename python/sglang/srt/{managers => eplb}/eplb_algorithms/__init__.py (96%)
 rename python/sglang/srt/{managers => eplb}/eplb_algorithms/deepseek.py (100%)
 rename python/sglang/srt/{managers => eplb}/eplb_algorithms/deepseek_vec.py (100%)
 rename python/sglang/srt/{managers => eplb}/eplb_manager.py (94%)
 rename python/sglang/srt/{ => eplb}/eplb_simulator/__init__.py (100%)
 rename python/sglang/srt/{ => eplb}/eplb_simulator/reader.py (97%)
 rename python/sglang/srt/{managers => eplb}/expert_distribution.py (99%)
 rename python/sglang/srt/{managers => eplb}/expert_location.py (99%)
 rename python/sglang/srt/{managers => eplb}/expert_location_dispatch.py (97%)
 rename python/sglang/srt/{model_executor => eplb}/expert_location_updater.py (99%)

diff --git a/python/sglang/srt/eplb/__init__.py b/python/sglang/srt/eplb/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/sglang/srt/managers/eplb_algorithms/__init__.py b/python/sglang/srt/eplb/eplb_algorithms/__init__.py
similarity index 96%
rename from python/sglang/srt/managers/eplb_algorithms/__init__.py
rename to python/sglang/srt/eplb/eplb_algorithms/__init__.py
index 7a970c320..e2a267810 100644
--- a/python/sglang/srt/managers/eplb_algorithms/__init__.py
+++ b/python/sglang/srt/eplb/eplb_algorithms/__init__.py
@@ -3,7 +3,7 @@ from typing import Optional
 
 import torch
 
-from sglang.srt.managers.eplb_algorithms import deepseek, deepseek_vec
+from sglang.srt.eplb.eplb_algorithms import deepseek, deepseek_vec
 
 
 class EplbAlgorithm(Enum):
diff --git a/python/sglang/srt/managers/eplb_algorithms/deepseek.py b/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
similarity index 100%
rename from python/sglang/srt/managers/eplb_algorithms/deepseek.py
rename to python/sglang/srt/eplb/eplb_algorithms/deepseek.py
diff --git a/python/sglang/srt/managers/eplb_algorithms/deepseek_vec.py b/python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
similarity index 100%
rename from python/sglang/srt/managers/eplb_algorithms/deepseek_vec.py
rename to python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
diff --git a/python/sglang/srt/managers/eplb_manager.py b/python/sglang/srt/eplb/eplb_manager.py
similarity index 94%
rename from python/sglang/srt/managers/eplb_manager.py
rename to python/sglang/srt/eplb/eplb_manager.py
index b74b7f21e..604e2c464 100644
--- a/python/sglang/srt/managers/eplb_manager.py
+++ b/python/sglang/srt/eplb/eplb_manager.py
@@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, List
 
 import torch.cuda
 
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
-from sglang.srt.managers.expert_location import ExpertLocationMetadata
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
+from sglang.srt.eplb.expert_location import ExpertLocationMetadata
 
 if TYPE_CHECKING:
     from sglang.srt.model_executor.model_runner import ModelRunner
diff --git a/python/sglang/srt/eplb_simulator/__init__.py b/python/sglang/srt/eplb/eplb_simulator/__init__.py
similarity index 100%
rename from python/sglang/srt/eplb_simulator/__init__.py
rename to python/sglang/srt/eplb/eplb_simulator/__init__.py
diff --git a/python/sglang/srt/eplb_simulator/reader.py b/python/sglang/srt/eplb/eplb_simulator/reader.py
similarity index 97%
rename from python/sglang/srt/eplb_simulator/reader.py
rename to python/sglang/srt/eplb/eplb_simulator/reader.py
index 326542c79..97405c319 100644
--- a/python/sglang/srt/eplb_simulator/reader.py
+++ b/python/sglang/srt/eplb/eplb_simulator/reader.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import torch
 from tqdm import tqdm
 
-from sglang.srt.managers.expert_distribution import (
+from sglang.srt.eplb.expert_distribution import (
     _convert_global_physical_count_to_logical_count,
 )
 
diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/eplb/expert_distribution.py
similarity index 99%
rename from python/sglang/srt/managers/expert_distribution.py
rename to python/sglang/srt/eplb/expert_distribution.py
index 5cbafff73..83fd42250 100644
--- a/python/sglang/srt/managers/expert_distribution.py
+++ b/python/sglang/srt/eplb/expert_distribution.py
@@ -24,7 +24,7 @@ import einops
 import torch
 import torch.distributed
 
-from sglang.srt.managers.expert_location import ExpertLocationMetadata
+from sglang.srt.eplb.expert_location import ExpertLocationMetadata
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.server_args import ServerArgs
diff --git a/python/sglang/srt/managers/expert_location.py b/python/sglang/srt/eplb/expert_location.py
similarity index 99%
rename from python/sglang/srt/managers/expert_location.py
rename to python/sglang/srt/eplb/expert_location.py
index 13ba9849e..822429dc4 100644
--- a/python/sglang/srt/managers/expert_location.py
+++ b/python/sglang/srt/eplb/expert_location.py
@@ -23,7 +23,7 @@ import torch.distributed
 import torch.nn.functional as F
 
 from sglang.srt.configs.model_config import ModelConfig
-from sglang.srt.managers import eplb_algorithms
+from sglang.srt.eplb import eplb_algorithms
 from sglang.srt.model_loader import get_model_architecture
 from sglang.srt.server_args import ServerArgs
 
diff --git a/python/sglang/srt/managers/expert_location_dispatch.py b/python/sglang/srt/eplb/expert_location_dispatch.py
similarity index 97%
rename from python/sglang/srt/managers/expert_location_dispatch.py
rename to python/sglang/srt/eplb/expert_location_dispatch.py
index 92b9095e3..36224eee7 100644
--- a/python/sglang/srt/managers/expert_location_dispatch.py
+++ b/python/sglang/srt/eplb/expert_location_dispatch.py
@@ -17,7 +17,7 @@ from typing import Literal, Optional
 
 import torch
 
-from sglang.srt.managers.expert_location import get_global_expert_location_metadata
+from sglang.srt.eplb.expert_location import get_global_expert_location_metadata
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 
 
diff --git a/python/sglang/srt/model_executor/expert_location_updater.py b/python/sglang/srt/eplb/expert_location_updater.py
similarity index 99%
rename from python/sglang/srt/model_executor/expert_location_updater.py
rename to python/sglang/srt/eplb/expert_location_updater.py
index 5dce3d9a3..6fdeb0322 100644
--- a/python/sglang/srt/model_executor/expert_location_updater.py
+++ b/python/sglang/srt/eplb/expert_location_updater.py
@@ -20,7 +20,7 @@ import torch
 import torch.distributed
 from torch.distributed import P2POp
 
-from sglang.srt.managers.expert_location import (
+from sglang.srt.eplb.expert_location import (
     ExpertLocationMetadata,
     get_global_expert_location_metadata,
 )
diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py
index df228e528..d5cf3b568 100644
--- a/python/sglang/srt/layers/moe/ep_moe/layer.py
+++ b/python/sglang/srt/layers/moe/ep_moe/layer.py
@@ -11,6 +11,8 @@ from sglang.srt.distributed import (
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
+from sglang.srt.eplb.expert_location import get_global_expert_location_metadata
+from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.layers.moe.ep_moe.kernels import (
     ep_gather,
     ep_scatter,
@@ -40,8 +42,6 @@ from sglang.srt.layers.quantization.fp8_kernel import (
     sglang_per_token_quant_fp8,
 )
 from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz
-from sglang.srt.managers.expert_location import get_global_expert_location_metadata
-from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import ForwardMode
 from sglang.srt.utils import (
diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
index 2c2c4d1f5..a3df8432c 100644
--- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
+++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
@@ -1,10 +1,8 @@
 import logging
 from dataclasses import dataclass
 
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.layers.quantization import deep_gemm_wrapper
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.utils import (
     DeepEPMode,
diff --git a/python/sglang/srt/layers/moe/topk.py b/python/sglang/srt/layers/moe/topk.py
index 348758c57..610931cc8 100644
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -18,12 +18,12 @@ from typing import Callable, Optional
 import torch
 import torch.nn.functional as F
 
-from sglang.srt.managers import expert_location_dispatch
-from sglang.srt.managers.expert_distribution import (
+from sglang.srt.eplb import expert_location_dispatch
+from sglang.srt.eplb.expert_distribution import (
     ExpertDistributionRecorder,
     get_global_expert_distribution_recorder,
 )
-from sglang.srt.managers.expert_location_dispatch import (
+from sglang.srt.eplb.expert_location_dispatch import (
     ExpertLocationDispatchInfo,
     topk_ids_logical_to_physical,
 )
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index b8364632f..e252f908c 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -58,6 +58,7 @@ from sglang.srt.disaggregation.utils import (
     prepare_abort,
 )
 from sglang.srt.distributed import get_pp_group, get_world_group
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.hf_transformers_utils import (
     get_processor,
     get_tokenizer,
@@ -65,9 +66,6 @@ from sglang.srt.hf_transformers_utils import (
 )
 from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
 from sglang.srt.layers.logits_processor import LogitsProcessorOutput
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
 from sglang.srt.managers.io_struct import (
     AbortReq,
     CloseSessionReqInput,
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 277cab8df..8b9a367f4 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -39,6 +39,19 @@ from sglang.srt.distributed import (
     set_mscclpp_all_reduce,
 )
 from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state
+from sglang.srt.eplb.eplb_manager import EPLBManager
+from sglang.srt.eplb.expert_distribution import (
+    ExpertDistributionRecorder,
+    get_global_expert_distribution_recorder,
+    set_global_expert_distribution_recorder,
+)
+from sglang.srt.eplb.expert_location import (
+    ExpertLocationMetadata,
+    compute_initial_expert_location_metadata,
+    get_global_expert_location_metadata,
+    set_global_expert_location_metadata,
+)
+from sglang.srt.eplb.expert_location_updater import ExpertLocationUpdater
 from sglang.srt.layers.attention.tbo_backend import TboAttnBackend
 from sglang.srt.layers.dp_attention import (
     get_attention_tp_group,
@@ -54,18 +67,6 @@ from sglang.srt.layers.sampler import Sampler
 from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model
 from sglang.srt.layers.utils import is_sm100_supported
 from sglang.srt.lora.lora_manager import LoRAManager
-from sglang.srt.managers.eplb_manager import EPLBManager
-from sglang.srt.managers.expert_distribution import (
-    ExpertDistributionRecorder,
-    get_global_expert_distribution_recorder,
-    set_global_expert_distribution_recorder,
-)
-from sglang.srt.managers.expert_location import (
-    ExpertLocationMetadata,
-    compute_initial_expert_location_metadata,
-    get_global_expert_location_metadata,
-    set_global_expert_location_metadata,
-)
 from sglang.srt.managers.schedule_batch import (
     GLOBAL_SERVER_ARGS_KEYS,
     global_server_args_dict,
@@ -84,7 +85,6 @@ from sglang.srt.mem_cache.memory_pool import (
     SWAKVPool,
 )
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
-from sglang.srt.model_executor.expert_location_updater import ExpertLocationUpdater
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
 from sglang.srt.model_loader import get_model
 from sglang.srt.model_loader.loader import DefaultModelLoader, get_model_loader
diff --git a/python/sglang/srt/models/deepseek_nextn.py b/python/sglang/srt/models/deepseek_nextn.py
index d83586358..e61dadadc 100644
--- a/python/sglang/srt/models/deepseek_nextn.py
+++ b/python/sglang/srt/models/deepseek_nextn.py
@@ -21,6 +21,7 @@ from torch import nn
 from transformers import PretrainedConfig
 
 from sglang.srt.distributed import get_tensor_model_parallel_world_size
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -28,9 +29,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
 )
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.models.deepseek_v2 import DeepseekV2DecoderLayer, DeepseekV3ForCausalLM
diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
index 79c7066df..f1ab8c3e7 100644
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -32,6 +32,9 @@ from sglang.srt.distributed import (
     parallel_state,
     tensor_model_parallel_all_reduce,
 )
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
+from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
+from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.communicator import (
     LayerCommunicator,
@@ -77,11 +80,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
 )
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
-from sglang.srt.managers.expert_location import ModelConfigForExpertLocation
-from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.model_loader.weight_utils import default_weight_loader
diff --git a/python/sglang/srt/models/hunyuan.py b/python/sglang/srt/models/hunyuan.py
index 00300bed5..41a833f3d 100644
--- a/python/sglang/srt/models/hunyuan.py
+++ b/python/sglang/srt/models/hunyuan.py
@@ -28,6 +28,7 @@ from sglang.srt.distributed import (
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
 )
+from sglang.srt.eplb.expert_distribution import ExpertDistributionRecorder
 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
@@ -48,7 +49,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
 )
-from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.model_loader.weight_utils import (
     default_weight_loader,
diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py
index 67e72d465..0968ba0f4 100644
--- a/python/sglang/srt/models/qwen2_moe.py
+++ b/python/sglang/srt/models/qwen2_moe.py
@@ -31,6 +31,11 @@ from sglang.srt.distributed import (
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
 )
+from sglang.srt.eplb.expert_distribution import (
+    ExpertDistributionRecorder,
+    get_global_expert_distribution_recorder,
+)
+from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.communicator import (
     LayerCommunicator,
@@ -64,11 +69,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
 )
-from sglang.srt.managers.expert_distribution import (
-    ExpertDistributionRecorder,
-    get_global_expert_distribution_recorder,
-)
-from sglang.srt.managers.expert_location import ModelConfigForExpertLocation
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
 from sglang.srt.model_loader.weight_utils import default_weight_loader
diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py
index f885500a9..c76326ec0 100644
--- a/python/sglang/srt/models/qwen3_moe.py
+++ b/python/sglang/srt/models/qwen3_moe.py
@@ -32,6 +32,9 @@ from sglang.srt.distributed import (
     tensor_model_parallel_all_gather,
     tensor_model_parallel_all_reduce,
 )
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
+from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
+from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
 from sglang.srt.layers.dp_attention import (
@@ -63,11 +66,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
 )
-from sglang.srt.managers.expert_distribution import (
-    get_global_expert_distribution_recorder,
-)
-from sglang.srt.managers.expert_location import ModelConfigForExpertLocation
-from sglang.srt.managers.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.managers.schedule_batch import global_server_args_dict
 from sglang.srt.model_executor.forward_batch_info import (
     ForwardBatch,
diff --git a/test/srt/test_expert_location_updater.py b/test/srt/test_expert_location_updater.py
index 2b1249b1b..094540294 100644
--- a/test/srt/test_expert_location_updater.py
+++ b/test/srt/test_expert_location_updater.py
@@ -9,7 +9,7 @@ import torch.distributed
 import torch.multiprocessing as mp
 from torch.multiprocessing import Process
 
-from sglang.srt.model_executor import expert_location_updater
+from sglang.srt.eplb import expert_location_updater
 from sglang.test.test_utils import CustomTestCase, find_available_port
 from sglang.utils import is_in_ci