Support redundant experts in expert parallel (#6461)
This commit is contained in:
@@ -163,8 +163,7 @@ class ExpertLocationMetadata:
|
||||
|
||||
num_physical_experts = (
|
||||
model_config_for_expert_location.num_logical_experts
|
||||
# TODO pr-chain: enable this later
|
||||
# + server_args.ep_num_redundant_experts
|
||||
+ server_args.ep_num_redundant_experts
|
||||
)
|
||||
ep_size = server_args.ep_size
|
||||
assert num_physical_experts % ep_size == 0
|
||||
|
||||
@@ -90,6 +90,7 @@ global_server_args_dict = {
|
||||
"speculative_accept_threshold_single": ServerArgs.speculative_accept_threshold_single,
|
||||
"torchao_config": ServerArgs.torchao_config,
|
||||
"triton_attention_reduce_in_fp32": ServerArgs.triton_attention_reduce_in_fp32,
|
||||
"ep_num_redundant_experts": ServerArgs.ep_num_redundant_experts,
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Reference in New Issue
Block a user