[Lint]Style: Convert vllm-ascend/ to ruff format(Batch #6) (#6001)

### What this PR does / why we need it?
| File Path |
| :--- |
| ` vllm_ascend/eplb/adaptor/abstract_adaptor.py` |
| ` vllm_ascend/eplb/adaptor/vllm_adaptor.py` |
| ` vllm_ascend/eplb/core/eplb_device_transfer_loader.py` |
| ` vllm_ascend/eplb/core/eplb_utils.py` |
| ` vllm_ascend/eplb/core/eplb_worker.py` |
| ` vllm_ascend/eplb/core/policy/policy_abstract.py` |
| ` vllm_ascend/eplb/core/policy/policy_default_eplb.py` |
| ` vllm_ascend/eplb/core/policy/policy_factory.py` |
| ` vllm_ascend/eplb/core/policy/policy_flashlb.py` |
| ` vllm_ascend/eplb/core/policy/policy_random.py` |
| ` vllm_ascend/eplb/core/policy/policy_swift_balancer.py` |
| ` vllm_ascend/eplb/eplb_updator.py` |
| ` vllm_ascend/eplb/utils.py` |
| ` vllm_ascend/model_loader/netloader/executor/elastic_load.py` |
| ` vllm_ascend/model_loader/netloader/executor/netloader_pg.py` |
| ` vllm_ascend/model_loader/netloader/interaction/elastic.py` |
| ` vllm_ascend/model_loader/netloader/load.py` |
| ` vllm_ascend/model_loader/netloader/netloader.py` |
| ` vllm_ascend/model_loader/netloader/utils.py` |
| ` vllm_ascend/patch/platform/__init__.py` |
| ` vllm_ascend/patch/platform/patch_balance_schedule.py` |
| ` vllm_ascend/patch/platform/patch_ec_connector.py` |
| ` vllm_ascend/patch/platform/patch_mamba_config.py` |
| ` vllm_ascend/patch/platform/patch_multiproc_executor.py` |
| ` vllm_ascend/patch/platform/patch_sched_yield.py` |


- vLLM version: v0.13.0
- vLLM main:
2c24bc6996

---------

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-01-24 22:08:33 +08:00
committed by GitHub
parent 153da1a669
commit 4e53c1d900
26 changed files with 894 additions and 1148 deletions

View File

@@ -19,8 +19,7 @@ from abc import abstractmethod
from typing import Any
class EplbAdaptor():
class EplbAdaptor:
def __init__(self, **args):
pass
@@ -29,12 +28,9 @@ class EplbAdaptor():
raise NotImplementedError
@abstractmethod
def do_update_expert_map(self, layer_id: Any,
updated_expert_map: Any) -> Any:
def do_update_expert_map(self, layer_id: Any, updated_expert_map: Any) -> Any:
raise NotImplementedError
@abstractmethod
def do_update_expert_weight(self, layer_id: Any,
local_expert_to_replace: Any,
buffer_tensor_id: Any) -> Any:
def do_update_expert_weight(self, layer_id: Any, local_expert_to_replace: Any, buffer_tensor_id: Any) -> Any:
raise NotImplementedError

View File

@@ -26,7 +26,6 @@ from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor
class VllmEplbAdaptor(EplbAdaptor):
def __init__(self, model, **args):
super().__init__(**args)
self.model = model
@@ -36,33 +35,37 @@ class VllmEplbAdaptor(EplbAdaptor):
self.num_dense_layers = getattr(self.model.config, "first_k_dense_replace", 0)
self.num_moe_layers = self.model.config.num_hidden_layers - self.num_dense_layers
for i in range(self.num_dense_layers,
self.model.config.num_hidden_layers):
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w13_weight_list"] = \
self.model.model.layers[i].mlp.experts.w13_weight_list
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w2_weight_list"] = \
self.model.model.layers[i].mlp.experts.w2_weight_list
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w13_weight_scale_fp32_list"] = \
for i in range(self.num_dense_layers, self.model.config.num_hidden_layers):
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w13_weight_list"] = self.model.model.layers[
i
].mlp.experts.w13_weight_list
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w2_weight_list"] = self.model.model.layers[
i
].mlp.experts.w2_weight_list
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w13_weight_scale_fp32_list"] = (
self.model.model.layers[i].mlp.experts.w13_weight_scale_fp32_list
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w2_weight_scale_list"] = \
)
self.param_dict["model.layers." + str(i) + ".mlp.experts." + "w2_weight_scale_list"] = (
self.model.model.layers[i].mlp.experts.w2_weight_scale_list
# TODO: init self.expert_weight_names depending on different model types, only deepseek v3 w8a8 and qwen3-moe is supported here
)
# TODO: init self.expert_weight_names depending on different model types.
# Only deepseek v3 w8a8 and qwen3-moe is supported here
if self.model.quant_config is not None:
self.expert_weight_names = [
"w13_weight_list", "w2_weight_list",
"w13_weight_scale_fp32_list", "w13_weight_offset",
"w2_weight_scale_list", "w2_weight_offset"
"w13_weight_list",
"w2_weight_list",
"w13_weight_scale_fp32_list",
"w13_weight_offset",
"w2_weight_scale_list",
"w2_weight_offset",
]
else:
self.expert_weight_names = ["w13_weight", "w2_weight"]
self.expert_map_per_layer_cpu = dict(
) # copy of expert map on CPU to avoid device synchronize frequently
self.expert_map_per_layer_cpu = dict() # copy of expert map on CPU to avoid device synchronize frequently
num_buffer_tensor = self.model.model.layers[-1].mlp.experts.local_num_experts
self.buffer_tensor_list: list[list[Any]] = [
[] for _ in range(num_buffer_tensor)
]
self.buffer_tensor_list: list[list[Any]] = [[] for _ in range(num_buffer_tensor)]
self.init_buffer_tensor(num_buffer_tensor)
self.expert_param_per_layer = dict()
@@ -70,18 +73,15 @@ class VllmEplbAdaptor(EplbAdaptor):
self.log2phy_map_per_layer = dict()
for layer_idx in range(self.num_moe_layers):
self.log2phy_map_per_layer[self.num_dense_layers + layer_idx] = \
self.model.get_log2phy_map(self.num_dense_layers + layer_idx)
self.log2phy_map_per_layer[self.num_dense_layers + layer_idx] = self.model.get_log2phy_map(
self.num_dense_layers + layer_idx
)
def init_buffer_tensor(self, num_buffer_tensor):
for buffer_id in range(num_buffer_tensor):
for name in self.expert_weight_names:
complete_name = "model.layers." + str(
self.num_dense_layers) + ".mlp.experts." + name
if name in [
"w13_weight_list", "w2_weight_list",
"w13_weight_scale_fp32_list", "w2_weight_scale_list"
]:
complete_name = "model.layers." + str(self.num_dense_layers) + ".mlp.experts." + name
if name in ["w13_weight_list", "w2_weight_list", "w13_weight_scale_fp32_list", "w2_weight_scale_list"]:
expert_tensor = self.param_dict[complete_name][0]
expert_tensor = expert_tensor.clone()
else:
@@ -99,19 +99,20 @@ class VllmEplbAdaptor(EplbAdaptor):
per_expert_param = list()
for name in self.expert_weight_names:
if name in [
"w13_weight_list", "w2_weight_list",
"w13_weight_scale_fp32_list",
"w2_weight_scale_list"
"w13_weight_list",
"w2_weight_list",
"w13_weight_scale_fp32_list",
"w2_weight_scale_list",
]:
per_expert_param.append(
self.param_dict["model.layers." + str(layer_idx) +
".mlp.experts." +
name][local_expert_id])
self.param_dict["model.layers." + str(layer_idx) + ".mlp.experts." + name][local_expert_id]
)
else:
per_expert_param.append(
self.param_dict["model.layers." + str(layer_idx) +
".mlp.experts." +
name][0].data[local_expert_id])
self.param_dict["model.layers." + str(layer_idx) + ".mlp.experts." + name][0].data[
local_expert_id
]
)
self.expert_param_per_layer[layer_idx].append(per_expert_param)
def get_rank_expert_workload(self) -> torch.Tensor:
@@ -123,26 +124,18 @@ class VllmEplbAdaptor(EplbAdaptor):
num_local_experts = expert_maps.max() + 1
expert_maps_list = expert_maps.tolist()
record: dict[str, Any] = {
"moe_layer_count": len(expert_maps_list),
"layer_list": []
}
record: dict[str, Any] = {"moe_layer_count": len(expert_maps_list), "layer_list": []}
for layer_idx, layer_data in enumerate(expert_maps_list):
layer_record: dict[str, Any] = {
"layer_id": layer_idx,
"device_count": len(layer_data),
"device_list": []
"device_list": [],
}
for device_idx, experts in enumerate(layer_data):
placement = [
experts.index(i) for i in range(num_local_experts)
]
device_record = {
"device_id": device_idx,
"device_expert": placement
}
placement = [experts.index(i) for i in range(num_local_experts)]
device_record = {"device_id": device_idx, "device_expert": placement}
layer_record["device_list"].append(device_record)
record["layer_list"].append(layer_record)
@@ -153,11 +146,10 @@ class VllmEplbAdaptor(EplbAdaptor):
def do_update_expert_map(self, layer_id, updated_expert_map):
self.expert_map_per_layer_cpu[layer_id].copy_(updated_expert_map)
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
buffer_tensor_id):
def do_update_expert_weight(self, layer_id, local_expert_to_replace, buffer_tensor_id):
for expert_tensor, buffer_tensor in zip(
self.expert_param_per_layer[layer_id][local_expert_to_replace],
self.buffer_tensor_list[buffer_tensor_id]):
self.expert_param_per_layer[layer_id][local_expert_to_replace], self.buffer_tensor_list[buffer_tensor_id]
):
expert_tensor.copy_(buffer_tensor)
logger.debug(f"Expert tensor shape is :{expert_tensor.shape}")
@@ -168,10 +160,8 @@ class VllmEplbAdaptor(EplbAdaptor):
def get_global_expert_map(self):
all_layer_global_expert_map = []
for layer_id in range(self.num_moe_layers):
map_cpu = self.model.model.layers[
self.num_dense_layers + layer_id].mlp.experts.global_expert_map.cpu()
map_cpu = self.model.model.layers[self.num_dense_layers + layer_id].mlp.experts.global_expert_map.cpu()
all_layer_global_expert_map.append(map_cpu)
self.expert_map_per_layer_cpu[self.num_dense_layers +
layer_id] = map_cpu[self.rank_id]
self.expert_map_per_layer_cpu[self.num_dense_layers + layer_id] = map_cpu[self.rank_id]
return torch.stack(all_layer_global_expert_map)