Bugfix: Expose the user policy type interface (#3336)
This PR primarily focuses on two key changes: 1. Adjusts internal interface calls to optimize the interaction logic between related modules. 2. Exposes an interface that allows users to select the EPLB algorithm, enabling more flexible configuration based on specific usage scenarios. These changes aim to enhance the usability of the system while ensuring the stability of internal operations. Relevant unit tests have been updated to cover the modified logic. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: Che Ruan <cr623@ic.ac.uk> Co-authored-by: Che Ruan <cr623@ic.ac.uk>
This commit is contained in:
@@ -53,6 +53,7 @@ class AscendConfig:
|
||||
|
||||
# Todo: Once https://github.com/vllm-project/vllm/issues/22246 is merged in vllm. Remove this config
|
||||
self.expert_map_path = additional_config.get("expert_map_path", None)
|
||||
self.eplb_policy_type = additional_config.get("eplb_policy_type", 1)
|
||||
self.expert_map_record_path = additional_config.get(
|
||||
"expert_map_record_path",
|
||||
None) # Provide path to export expert map
|
||||
|
||||
@@ -73,8 +73,12 @@ class EplbWorker:
|
||||
new_expert_maps = self.local2global(new_placement)
|
||||
self.update_expert_map(new_expert_maps)
|
||||
|
||||
update_info = self.compose_expert_update_info_greedy(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
if self.policy_type == 2:
|
||||
update_info = self.compose_expert_update_info_bipartite(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
else:
|
||||
update_info = self.compose_expert_update_info_greedy(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
self.old_expert_maps = new_expert_maps
|
||||
logger.info("EPLB Process compute complete")
|
||||
|
||||
|
||||
@@ -470,6 +470,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
self.dynamic_eplb = self.ascend_config.dynamic_eplb
|
||||
if self.dynamic_eplb:
|
||||
self.is_eplb_warmuped = False
|
||||
self.policy_type = self.ascend_config.eplb_policy_type
|
||||
self.eplb_loader = D2DExpertWeightLoader()
|
||||
self.manager = Manager()
|
||||
self.shared_dict = self.manager.dict({
|
||||
@@ -478,7 +479,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
"expert_maps": None
|
||||
})
|
||||
self.eplb_process = EplbProcess(shared_dict=self.shared_dict,
|
||||
policy_type=1,
|
||||
policy_type=self.policy_type,
|
||||
enable_d2d=True)
|
||||
self.process = self.eplb_process._launch_process()
|
||||
ascend_config = get_ascend_config()
|
||||
|
||||
Reference in New Issue
Block a user