[EPLB] Avoiding eplb's dependency on a specified model (#6528)

### What this PR does / why we need it?
1. Currently, eplb registers different attributes for different models,
but these attributes are not actually used. Now, these attributes are
directly deleted.
2. Add some log about eplb.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?
#### Deepseek v3.1 chat
Of course! Here is a comprehensive explanation of deep learning, broken
down for clarity.\n\n### The Simple Analogy: A Child Learning to
Recognize a Cat\n\nImagine teaching a child what a cat is. You don't
give them a rulebook with instructions like \"has pointy ears, whiskers,
and a tail.\" Instead, you show them many pictures, saying \"this is a
cat\" or \"this is not a cat.\" The child's brain gradually learns to
identify the complex patterns—the combination of shapes, colors, and
textures—that define \"cat-ness.\"\n\n**Deep learning is essentially
this, but for computers.** It's a method for teaching computers to learn
from examples and recognize patterns directly from data (like images,
sound, or text) without being explicitly programmed with rigid
rules.\n\n---\n\n### The Technical Definition\n\n**Deep Learning is a
subfield of machine learning, which itself is a subfield of artificial
intelligence (AI).** It uses artificial **neural networks** with many
layers (\"deep\" networks) to model and understand complex patterns in
data.\n\nHere are the key concepts in that definition:\n\n1.
**Artificial Intelligence (AI):** The broad science of making machines
smart and capable of performing tasks that typically require human
intelligence.\n2. **Machine Learning (ML):** A subset of AI that gives
computers the ability to learn from data *without* being explicitly
programmed for every single rule.\n3. **Deep Learning (DL):** A
specific, powerful

- vLLM version: v0.15.0
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-02-10 15:58:44 +08:00
committed by GitHub
parent 7d4833bce9
commit 34eecacace
6 changed files with 21 additions and 71 deletions

View File

@@ -45,6 +45,7 @@ class TestAscendConfig(unittest.TestCase):
self.vllm_config = vllm_config
self.moe_config = moe_config
self.mock_npu = patch("torch.Tensor.npu", new=lambda self: self).start()
os.environ["DYNAMIC_EPLB"] = "true"
def test_init_eplb_config_with_eplb(self):
eplb_config = init_ascend_config(self.vllm_config).eplb_config
@@ -71,6 +72,6 @@ class TestAscendConfig(unittest.TestCase):
eplb_config = init_ascend_config(self.vllm_config).eplb_config
_, expert_map, log2phy, redundant_experts = init_eplb_config(eplb_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
print(expert_map, log2phy, redundant_experts)
self.assertIsNone(log2phy)
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertEqual(redundant_experts, 0)

View File

@@ -385,6 +385,7 @@ class EplbConfig:
def _validate_config(self):
if self.expert_map_path is not None:
logger.info(f"The expert_map is {self.config['dynamic_eplb']}")
if self.expert_map_path[-5:] != ".json":
raise TypeError("The expert_map is not json.")
if not os.path.exists(self.expert_map_path):
@@ -402,6 +403,14 @@ class EplbConfig:
raise ValueError(f"{key} must greater than 0; got {self.config[key]} instead")
if self.eplb_policy_type not in [0, 1, 2, 3]:
raise ValueError("eplb_policy_type must in [0, 1, 2, 3]")
if self.config["dynamic_eplb"]:
assert (
os.getenv("DYNAMIC_EPLB", "false").lower() in ("true", "1")
or os.getenv("EXPERT_MAP_RECORD", "false") == "true"
), "The environment variable DYNAMIC_EPLB or EXPERT_MAP_RECORD of the ePLB must be set to true."
logger.info(f"Dynamic EPLB is {self.config['dynamic_eplb']}")
logger.info(f"The number of redundant experts is {self.config['num_redundant_experts']}")
_ASCEND_CONFIG: AscendConfig | None = None

View File

@@ -1,36 +0,0 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
# Todo: Once https://github.com/vllm-project/vllm/issues/22246 is merged in vllm. Remove this adaptor.
from abc import abstractmethod
from typing import Any
class EplbAdaptor:
def __init__(self, **args):
pass
@abstractmethod
def get_rank_expert_workload(self):
raise NotImplementedError
@abstractmethod
def do_update_expert_map(self, layer_id: Any, updated_expert_map: Any) -> Any:
raise NotImplementedError
@abstractmethod
def do_update_expert_weight(self, layer_id: Any, local_expert_to_replace: Any, buffer_tensor_id: Any) -> Any:
raise NotImplementedError

View File

@@ -22,10 +22,8 @@ import torch
import torch.distributed as dist
from vllm.logger import logger
from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor
class VllmEplbAdaptor(EplbAdaptor):
class VllmEplbAdaptor:
def __init__(self, model, **args):
super().__init__(**args)
self.model = model

View File

@@ -28,47 +28,25 @@ def get_log2phy_map(self, layer_id):
return self.model.layers[layer_id].mlp.experts.get_log2phy_map()
def get_all_expert_map(self, num_moe_layers):
all_loads = []
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
for layer_id in range(num_moe_layers):
load_tensor = self.get_expert_map(layer_id + num_dense_layers) # (num_experts_per_layer,)
all_loads.append(load_tensor)
return torch.stack(all_loads, dim=0)
def get_all_moe_loads(self):
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
num_dense_layers = getattr(self.model.config, "first_k_dense_replace", 0)
num_layers = self.model.config.num_hidden_layers
all_moe_loads = torch.stack(
[
self.model.layers[layer_id + num_dense_layers].mlp.experts.moe_load
for layer_id in range(self.num_moe_layers)
],
[self.model.layers[layer_id].mlp.experts.moe_load for layer_id in range(num_dense_layers, num_layers)],
dim=0,
)
return all_moe_loads
def clear_all_moe_loads(self):
num_dense_layers = self.num_dense_layers if hasattr(self, "num_dense_layers") else 0
for layer_id in range(self.num_moe_layers):
self.model.layers[layer_id + num_dense_layers].mlp.experts.clear_moe_load()
num_dense_layers = getattr(self.model.config, "first_k_dense_replace", 0)
num_layers = self.model.config.num_hidden_layers
for layer_id in range(num_dense_layers, num_layers):
self.model.layers[layer_id].mlp.experts.clear_moe_load()
def model_register(model, model_config):
def model_register(model):
model.get_expert_map = types.MethodType(get_expert_map, model)
model.get_log2phy_map = types.MethodType(get_log2phy_map, model)
model.get_all_expert_map = types.MethodType(get_all_expert_map, model)
model.get_all_moe_loads = types.MethodType(get_all_moe_loads, model)
model.clear_all_moe_loads = types.MethodType(clear_all_moe_loads, model)
config = model_config.hf_text_config
if config.model_type == "qwen3_moe":
model.num_moe_layers = config.num_hidden_layers
elif config.model_type == "deepseek_v2" or config.model_type == "deepseek_v3":
model.num_dense_layers = config.first_k_dense_replace
model.num_moe_layers = config.num_hidden_layers - model.num_dense_layers
else:
raise NotImplementedError("EPLB is not supported.")

View File

@@ -2308,7 +2308,7 @@ class NPUModelRunner(GPUModelRunner):
with DeviceMemoryProfiler() as m: # noqa: SIM117
self.model = get_model(vllm_config=self.vllm_config)
if self.dynamic_eplb:
model_register(self.model, self.model_config)
model_register(self.model)
if self.drafter:
logger.info("Loading drafter model...")
with get_tp_context(self.drafter):