Bugfix: Expose the user policy type interface (#3336)

This PR primarily focuses on two key changes:
1. Adjusts internal interface calls to optimize the interaction logic
between related modules.
2. Exposes an interface that allows users to select the EPLB algorithm,
enabling more flexible configuration based on specific usage scenarios.

These changes aim to enhance the usability of the system while ensuring
the stability of internal operations. Relevant unit tests have been
updated to cover the modified logic.

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: Che Ruan <cr623@ic.ac.uk>
Co-authored-by: Che Ruan <cr623@ic.ac.uk>
This commit is contained in:
Mercykid-bash
2025-10-11 16:28:57 +08:00
committed by GitHub
parent e4acb2dfc7
commit ecb1713dfc
3 changed files with 9 additions and 3 deletions

View File

@@ -53,6 +53,7 @@ class AscendConfig:
# Todo: Once https://github.com/vllm-project/vllm/issues/22246 is merged in vllm. Remove this config
self.expert_map_path = additional_config.get("expert_map_path", None)
self.eplb_policy_type = additional_config.get("eplb_policy_type", 1)
self.expert_map_record_path = additional_config.get(
"expert_map_record_path",
None) # Provide path to export expert map

View File

@@ -73,8 +73,12 @@ class EplbWorker:
new_expert_maps = self.local2global(new_placement)
self.update_expert_map(new_expert_maps)
update_info = self.compose_expert_update_info_greedy(
new_expert_maps, self.old_expert_maps)
if self.policy_type == 2:
update_info = self.compose_expert_update_info_bipartite(
new_expert_maps, self.old_expert_maps)
else:
update_info = self.compose_expert_update_info_greedy(
new_expert_maps, self.old_expert_maps)
self.old_expert_maps = new_expert_maps
logger.info("EPLB Process compute complete")

View File

@@ -470,6 +470,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
self.dynamic_eplb = self.ascend_config.dynamic_eplb
if self.dynamic_eplb:
self.is_eplb_warmuped = False
self.policy_type = self.ascend_config.eplb_policy_type
self.eplb_loader = D2DExpertWeightLoader()
self.manager = Manager()
self.shared_dict = self.manager.dict({
@@ -478,7 +479,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
"expert_maps": None
})
self.eplb_process = EplbProcess(shared_dict=self.shared_dict,
policy_type=1,
policy_type=self.policy_type,
enable_d2d=True)
self.process = self.eplb_process._launch_process()
ascend_config = get_ascend_config()